Compare commits
130 Commits
2014.08.28
...
2014.09.10
Author | SHA1 | Date | |
---|---|---|---|
|
561266641f | ||
|
6899f2fe9e | ||
|
136c8bd275 | ||
|
1bf5423e82 | ||
|
2c5c1f48e9 | ||
|
d8e487fbd9 | ||
|
bc7ff0a8dd | ||
|
8e9da53140 | ||
|
f1d15e6dbc | ||
|
91ebb17ede | ||
|
c63b30901b | ||
|
f009f19ece | ||
|
68477e8839 | ||
|
0dc5365564 | ||
|
9face18d08 | ||
|
ff0ba8ce0f | ||
|
759c6293bd | ||
|
3fbeb95e14 | ||
|
6e25f51cdf | ||
|
321c1e44f9 | ||
|
cc7fec5818 | ||
|
5fb9077e8c | ||
|
8e20f81c5b | ||
|
e154762c74 | ||
|
ba92ab3d05 | ||
|
a2f0cdc074 | ||
|
70a1ecd2c1 | ||
|
88a23aef5a | ||
|
140d8d77b3 | ||
|
665cd96929 | ||
|
4d067a58ca | ||
|
1c1cff6a52 | ||
|
f063a04f07 | ||
|
af8812bb9b | ||
|
78149a962b | ||
|
f2d9e3a370 | ||
|
16e6f396b4 | ||
|
c6ec6b2e8b | ||
|
7bbc6428b6 | ||
|
c1a3c9ddb2 | ||
|
feec0f56f5 | ||
|
8029857d27 | ||
|
aa61802c1e | ||
|
f54aee0209 | ||
|
5df921b0e3 | ||
|
35d5b67876 | ||
|
674c869af4 | ||
|
10710ae386 | ||
|
a0e07d3161 | ||
|
88fc294f7f | ||
|
a232bb9551 | ||
|
eb833b7f5a | ||
|
f164038b79 | ||
|
f7a361c4f1 | ||
|
884ae74785 | ||
|
1dba4a2185 | ||
|
7d4d5f25ed | ||
|
33422c056d | ||
|
a7862a1bc8 | ||
|
3baa62e8d1 | ||
|
1bf8cf5c2c | ||
|
eade1d7eab | ||
|
1a94ff6865 | ||
|
b47ed50aaf | ||
|
1b8477729a | ||
|
ff6ade294c | ||
|
11fc065c57 | ||
|
94388f50b3 | ||
|
a444648202 | ||
|
7ca2e11f24 | ||
|
563f6dea59 | ||
|
e4039057be | ||
|
11342b54d6 | ||
|
49fa38adf2 | ||
|
9b330db7f0 | ||
|
d740f7e16f | ||
|
07e7dc4bdc | ||
|
4c59dc4c34 | ||
|
7260ea0705 | ||
|
35b1e44567 | ||
|
c9ea760e31 | ||
|
9ebf22b7d9 | ||
|
2582bebe06 | ||
|
c9cc0bf57b | ||
|
61edcfb0a2 | ||
|
a8be56ce3d | ||
|
329818484c | ||
|
8bdfddf641 | ||
|
36d65b61d4 | ||
|
7d48c06f27 | ||
|
d169e36f5c | ||
|
2d7af09487 | ||
|
48d4681efc | ||
|
9ea9b61448 | ||
|
04b4aa4a7b | ||
|
5a3f0d9aee | ||
|
1ed5b5c9c8 | ||
|
d10548b691 | ||
|
e990510e6b | ||
|
55f7bd2dcc | ||
|
f931e25959 | ||
|
ca9cd290c7 | ||
|
49e23e8b6a | ||
|
ae7246e7d5 | ||
|
43fd392413 | ||
|
3e7c12240c | ||
|
7eb21356f9 | ||
|
f30a38be8b | ||
|
2aebbccefc | ||
|
b170935a8f | ||
|
35241d05d1 | ||
|
be2dd0651e | ||
|
6a400a6339 | ||
|
7b53af7f70 | ||
|
ca7b3246b6 | ||
|
9c4c233b84 | ||
|
8a6c59865d | ||
|
1d57b2520c | ||
|
17b0b8a166 | ||
|
12c82cf9cb | ||
|
0bafcf6f46 | ||
|
bbc9dc56f6 | ||
|
72c65d39ff | ||
|
676e3ecf24 | ||
|
78272a076e | ||
|
723e04d0be | ||
|
08a36c3569 | ||
|
37709fae89 | ||
|
a81e4eb69d | ||
|
8e72edfb19 |
@@ -143,32 +143,6 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
|
||||
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])
|
||||
|
||||
def test_ComedyCentralShows(self):
|
||||
self.assertMatch(
|
||||
'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
|
||||
['ComedyCentralShows'])
|
||||
self.assertMatch(
|
||||
'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
|
||||
['ComedyCentralShows'])
|
||||
self.assertMatch(
|
||||
'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
|
||||
['ComedyCentralShows'])
|
||||
self.assertMatch(
|
||||
'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
|
||||
['ComedyCentralShows'])
|
||||
self.assertMatch(
|
||||
'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary',
|
||||
['ComedyCentralShows'])
|
||||
self.assertMatch(
|
||||
'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall',
|
||||
['ComedyCentralShows'])
|
||||
self.assertMatch(
|
||||
'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
|
||||
['ComedyCentralShows'])
|
||||
self.assertMatch(
|
||||
'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
|
||||
['ComedyCentralShows'])
|
||||
|
||||
def test_yahoo_https(self):
|
||||
# https://github.com/rg3/youtube-dl/issues/2701
|
||||
self.assertMatch(
|
||||
|
59
test/test_cache.py
Normal file
59
test/test_cache.py
Normal file
@@ -0,0 +1,59 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import shutil
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
from test.helper import FakeYDL
|
||||
from youtube_dl.cache import Cache
|
||||
|
||||
|
||||
def _is_empty(d):
|
||||
return not bool(os.listdir(d))
|
||||
|
||||
|
||||
def _mkdir(d):
|
||||
if not os.path.exists(d):
|
||||
os.mkdir(d)
|
||||
|
||||
|
||||
class TestCache(unittest.TestCase):
|
||||
def setUp(self):
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata')
|
||||
_mkdir(TESTDATA_DIR)
|
||||
self.test_dir = os.path.join(TESTDATA_DIR, 'cache_test')
|
||||
self.tearDown()
|
||||
|
||||
def tearDown(self):
|
||||
if os.path.exists(self.test_dir):
|
||||
shutil.rmtree(self.test_dir)
|
||||
|
||||
def test_cache(self):
|
||||
ydl = FakeYDL({
|
||||
'cachedir': self.test_dir,
|
||||
})
|
||||
c = Cache(ydl)
|
||||
obj = {'x': 1, 'y': ['ä', '\\a', True]}
|
||||
self.assertEqual(c.load('test_cache', 'k.'), None)
|
||||
c.store('test_cache', 'k.', obj)
|
||||
self.assertEqual(c.load('test_cache', 'k2'), None)
|
||||
self.assertFalse(_is_empty(self.test_dir))
|
||||
self.assertEqual(c.load('test_cache', 'k.'), obj)
|
||||
self.assertEqual(c.load('test_cache', 'y'), None)
|
||||
self.assertEqual(c.load('test_cache2', 'k.'), None)
|
||||
c.remove()
|
||||
self.assertFalse(os.path.exists(self.test_dir))
|
||||
self.assertEqual(c.load('test_cache', 'k.'), None)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@@ -167,21 +167,21 @@ def generator(test_case):
|
||||
if not test_case.get('params', {}).get('skip_download', False):
|
||||
self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
|
||||
self.assertTrue(tc_filename in finished_hook_called)
|
||||
expected_minsize = tc.get('file_minsize', 10000)
|
||||
if expected_minsize is not None:
|
||||
if params.get('test'):
|
||||
expected_minsize = max(expected_minsize, 10000)
|
||||
got_fsize = os.path.getsize(tc_filename)
|
||||
assertGreaterEqual(
|
||||
self, got_fsize, expected_minsize,
|
||||
'Expected %s to be at least %s, but it\'s only %s ' %
|
||||
(tc_filename, format_bytes(expected_minsize),
|
||||
format_bytes(got_fsize)))
|
||||
if 'md5' in tc:
|
||||
md5_for_file = _file_md5(tc_filename)
|
||||
self.assertEqual(md5_for_file, tc['md5'])
|
||||
info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
|
||||
self.assertTrue(os.path.exists(info_json_fn))
|
||||
if 'md5' in tc:
|
||||
md5_for_file = _file_md5(tc_filename)
|
||||
self.assertEqual(md5_for_file, tc['md5'])
|
||||
expected_minsize = tc.get('file_minsize', 10000)
|
||||
if expected_minsize is not None:
|
||||
if params.get('test'):
|
||||
expected_minsize = max(expected_minsize, 10000)
|
||||
got_fsize = os.path.getsize(tc_filename)
|
||||
assertGreaterEqual(
|
||||
self, got_fsize, expected_minsize,
|
||||
'Expected %s to be at least %s, but it\'s only %s ' %
|
||||
(tc_filename, format_bytes(expected_minsize),
|
||||
format_bytes(got_fsize)))
|
||||
with io.open(info_json_fn, encoding='utf-8') as infof:
|
||||
info_dict = json.load(infof)
|
||||
|
||||
|
@@ -211,6 +211,9 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(parse_duration('00:01:01'), 61)
|
||||
self.assertEqual(parse_duration('x:y'), None)
|
||||
self.assertEqual(parse_duration('3h11m53s'), 11513)
|
||||
self.assertEqual(parse_duration('3h 11m 53s'), 11513)
|
||||
self.assertEqual(parse_duration('3 hours 11 minutes 53 seconds'), 11513)
|
||||
self.assertEqual(parse_duration('3 hours 11 mins 53 secs'), 11513)
|
||||
self.assertEqual(parse_duration('62m45s'), 3765)
|
||||
self.assertEqual(parse_duration('6m59s'), 419)
|
||||
self.assertEqual(parse_duration('49s'), 49)
|
||||
|
@@ -57,6 +57,7 @@ from .utils import (
|
||||
YoutubeDLHandler,
|
||||
prepend_extension,
|
||||
)
|
||||
from .cache import Cache
|
||||
from .extractor import get_info_extractor, gen_extractors
|
||||
from .downloader import get_suitable_downloader
|
||||
from .postprocessor import FFmpegMergerPP
|
||||
@@ -133,7 +134,7 @@ class YoutubeDL(object):
|
||||
daterange: A DateRange object, download only if the upload_date is in the range.
|
||||
skip_download: Skip the actual download of the video file
|
||||
cachedir: Location of the cache files in the filesystem.
|
||||
None to disable filesystem cache.
|
||||
False to disable filesystem cache.
|
||||
noplaylist: Download single video instead of a playlist if in doubt.
|
||||
age_limit: An integer representing the user's age in years.
|
||||
Unsuitable videos for the given age are skipped.
|
||||
@@ -195,6 +196,7 @@ class YoutubeDL(object):
|
||||
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
||||
self._err_file = sys.stderr
|
||||
self.params = params
|
||||
self.cache = Cache(self)
|
||||
|
||||
if params.get('bidi_workaround', False):
|
||||
try:
|
||||
|
@@ -74,6 +74,7 @@ __authors__ = (
|
||||
'Keith Beckman',
|
||||
'Ole Ernst',
|
||||
'Aaron McDaniel (mcd1992)',
|
||||
'Magnus Kolstad',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
@@ -84,7 +85,6 @@ import optparse
|
||||
import os
|
||||
import random
|
||||
import shlex
|
||||
import shutil
|
||||
import sys
|
||||
|
||||
|
||||
@@ -96,7 +96,6 @@ from .utils import (
|
||||
decodeOption,
|
||||
get_term_width,
|
||||
DownloadError,
|
||||
get_cachedir,
|
||||
MaxDownloadsReached,
|
||||
preferredencoding,
|
||||
read_batch_urls,
|
||||
@@ -518,10 +517,10 @@ def parseOpts(overrideArguments=None):
|
||||
filesystem.add_option('--cookies',
|
||||
dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
|
||||
filesystem.add_option(
|
||||
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
|
||||
'--cache-dir', dest='cachedir', default=None, metavar='DIR',
|
||||
help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
|
||||
filesystem.add_option(
|
||||
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
|
||||
'--no-cache-dir', action='store_const', const=False, dest='cachedir',
|
||||
help='Disable filesystem caching')
|
||||
filesystem.add_option(
|
||||
'--rm-cache-dir', action='store_true', dest='rm_cachedir',
|
||||
@@ -872,20 +871,7 @@ def _real_main(argv=None):
|
||||
|
||||
# Remove cache dir
|
||||
if opts.rm_cachedir:
|
||||
if opts.cachedir is None:
|
||||
ydl.to_screen(u'No cache dir specified (Did you combine --no-cache-dir and --rm-cache-dir?)')
|
||||
else:
|
||||
if ('.cache' not in opts.cachedir) or ('youtube-dl' not in opts.cachedir):
|
||||
ydl.to_screen(u'Not removing directory %s - this does not look like a cache dir')
|
||||
retcode = 141
|
||||
else:
|
||||
ydl.to_screen(
|
||||
u'Removing cache dir %s .' % opts.cachedir,
|
||||
skip_eol=True)
|
||||
if os.path.exists(opts.cachedir):
|
||||
ydl.to_screen(u'.', skip_eol=True)
|
||||
shutil.rmtree(opts.cachedir)
|
||||
ydl.to_screen(u'.')
|
||||
ydl.cache.remove()
|
||||
|
||||
# Maybe do nothing
|
||||
if (len(all_urls) < 1) and (opts.load_info_filename is None):
|
||||
|
94
youtube_dl/cache.py
Normal file
94
youtube_dl/cache.py
Normal file
@@ -0,0 +1,94 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import errno
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import traceback
|
||||
|
||||
from .utils import (
|
||||
write_json_file,
|
||||
)
|
||||
|
||||
|
||||
class Cache(object):
|
||||
def __init__(self, ydl):
|
||||
self._ydl = ydl
|
||||
|
||||
def _get_root_dir(self):
|
||||
res = self._ydl.params.get('cachedir')
|
||||
if res is None:
|
||||
cache_root = os.environ.get('XDG_CACHE_HOME', '~/.cache')
|
||||
res = os.path.join(cache_root, 'youtube-dl')
|
||||
return os.path.expanduser(res)
|
||||
|
||||
def _get_cache_fn(self, section, key, dtype):
|
||||
assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
|
||||
'invalid section %r' % section
|
||||
assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key
|
||||
return os.path.join(
|
||||
self._get_root_dir(), section, '%s.%s' % (key, dtype))
|
||||
|
||||
@property
|
||||
def enabled(self):
|
||||
return self._ydl.params.get('cachedir') is not False
|
||||
|
||||
def store(self, section, key, data, dtype='json'):
|
||||
assert dtype in ('json',)
|
||||
|
||||
if not self.enabled:
|
||||
return
|
||||
|
||||
fn = self._get_cache_fn(section, key, dtype)
|
||||
try:
|
||||
try:
|
||||
os.makedirs(os.path.dirname(fn))
|
||||
except OSError as ose:
|
||||
if ose.errno != errno.EEXIST:
|
||||
raise
|
||||
write_json_file(data, fn)
|
||||
except Exception:
|
||||
tb = traceback.format_exc()
|
||||
self._ydl.report_warning(
|
||||
'Writing cache to %r failed: %s' % (fn, tb))
|
||||
|
||||
def load(self, section, key, dtype='json', default=None):
|
||||
assert dtype in ('json',)
|
||||
|
||||
if not self.enabled:
|
||||
return default
|
||||
|
||||
cache_fn = self._get_cache_fn(section, key, dtype)
|
||||
try:
|
||||
try:
|
||||
with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
|
||||
return json.load(cachef)
|
||||
except ValueError:
|
||||
try:
|
||||
file_size = os.path.getsize(cache_fn)
|
||||
except (OSError, IOError) as oe:
|
||||
file_size = str(oe)
|
||||
self._ydl.report_warning(
|
||||
'Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
|
||||
except IOError:
|
||||
pass # No cache available
|
||||
|
||||
return default
|
||||
|
||||
def remove(self):
|
||||
if not self.enabled:
|
||||
self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)')
|
||||
return
|
||||
|
||||
cachedir = self._get_root_dir()
|
||||
if not any((term in cachedir) for term in ('cache', 'tmp')):
|
||||
raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir)
|
||||
|
||||
self._ydl.to_screen(
|
||||
'Removing cache dir %s .' % cachedir, skip_eol=True)
|
||||
if os.path.exists(cachedir):
|
||||
self._ydl.to_screen('.', skip_eol=True)
|
||||
shutil.rmtree(cachedir)
|
||||
self._ydl.to_screen('.')
|
@@ -193,7 +193,8 @@ class HttpFD(FileDownloader):
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'Did not get any data blocks')
|
||||
return False
|
||||
stream.close()
|
||||
if tmpfilename != u'-':
|
||||
stream.close()
|
||||
self.report_finish(data_len_str, (time.time() - start))
|
||||
if data_len is not None and byte_counter != data_len:
|
||||
raise ContentTooShortError(byte_counter, int(data_len))
|
||||
|
@@ -4,6 +4,7 @@ from .addanime import AddAnimeIE
|
||||
from .adultswim import AdultSwimIE
|
||||
from .aftonbladet import AftonbladetIE
|
||||
from .anitube import AnitubeIE
|
||||
from .anysex import AnySexIE
|
||||
from .aol import AolIE
|
||||
from .allocine import AllocineIE
|
||||
from .aparat import AparatIE
|
||||
@@ -23,6 +24,7 @@ from .auengine import AUEngineIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bbccouk import BBCCoUkIE
|
||||
from .beeg import BeegIE
|
||||
from .bilibili import BiliBiliIE
|
||||
from .blinkx import BlinkxIE
|
||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||
@@ -65,9 +67,11 @@ from .dailymotion import (
|
||||
DailymotionUserIE,
|
||||
)
|
||||
from .daum import DaumIE
|
||||
from .dbtv import DBTVIE
|
||||
from .dfb import DFBIE
|
||||
from .dotsub import DotsubIE
|
||||
from .dreisat import DreiSatIE
|
||||
from .drtuber import DrTuberIE
|
||||
from .drtv import DRTVIE
|
||||
from .dump import DumpIE
|
||||
from .defense import DefenseGouvFrIE
|
||||
@@ -83,8 +87,9 @@ from .ellentv import (
|
||||
EllenTVClipsIE,
|
||||
)
|
||||
from .elpais import ElPaisIE
|
||||
from .empflix import EmpflixIE
|
||||
from .empflix import EMPFlixIE
|
||||
from .engadget import EngadgetIE
|
||||
from .eporner import EpornerIE
|
||||
from .escapist import EscapistIE
|
||||
from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .exfm import ExfmIE
|
||||
@@ -134,6 +139,8 @@ from .grooveshark import GroovesharkIE
|
||||
from .hark import HarkIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .hornbunny import HornBunnyIE
|
||||
from .hostingbulk import HostingBulkIE
|
||||
from .hotnewhiphop import HotNewHipHopIE
|
||||
from .howcast import HowcastIE
|
||||
from .howstuffworks import HowStuffWorksIE
|
||||
@@ -194,6 +201,7 @@ from .mitele import MiTeleIE
|
||||
from .mixcloud import MixcloudIE
|
||||
from .mlb import MLBIE
|
||||
from .mpora import MporaIE
|
||||
from .moevideo import MoeVideoIE
|
||||
from .mofosex import MofosexIE
|
||||
from .mojvideo import MojvideoIE
|
||||
from .mooshare import MooshareIE
|
||||
@@ -230,6 +238,7 @@ from .niconico import NiconicoIE
|
||||
from .ninegag import NineGagIE
|
||||
from .noco import NocoIE
|
||||
from .normalboots import NormalbootsIE
|
||||
from .nosvideo import NosVideoIE
|
||||
from .novamov import NovaMovIE
|
||||
from .nowness import NownessIE
|
||||
from .nowvideo import NowVideoIE
|
||||
@@ -257,6 +266,7 @@ from .podomatic import PodomaticIE
|
||||
from .pornhd import PornHdIE
|
||||
from .pornhub import PornHubIE
|
||||
from .pornotube import PornotubeIE
|
||||
from .promptfile import PromptFileIE
|
||||
from .prosiebensat1 import ProSiebenSat1IE
|
||||
from .pyvideo import PyvideoIE
|
||||
from .radiofrance import RadioFranceIE
|
||||
@@ -288,6 +298,7 @@ from .scivee import SciVeeIE
|
||||
from .screencast import ScreencastIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .shared import SharedIE
|
||||
from .sharesix import ShareSixIE
|
||||
from .sina import SinaIE
|
||||
from .slideshare import SlideshareIE
|
||||
from .slutload import SlutloadIE
|
||||
@@ -321,6 +332,7 @@ from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .steam import SteamIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
from .sunporno import SunPornoIE
|
||||
from .swrmediathek import SWRMediathekIE
|
||||
from .syfy import SyfyIE
|
||||
from .sztvhu import SztvHuIE
|
||||
@@ -340,6 +352,7 @@ from .theplatform import ThePlatformIE
|
||||
from .thisav import ThisAVIE
|
||||
from .tinypic import TinyPicIE
|
||||
from .tlc import TlcIE, TlcDeIE
|
||||
from .tnaflix import TNAFlixIE
|
||||
from .toutv import TouTvIE
|
||||
from .toypics import ToypicsUserIE, ToypicsIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
@@ -365,6 +378,7 @@ from .veehd import VeeHDIE
|
||||
from .veoh import VeohIE
|
||||
from .vesti import VestiIE
|
||||
from .vevo import VevoIE
|
||||
from .vgtv import VGTVIE
|
||||
from .vh1 import VH1IE
|
||||
from .viddler import ViddlerIE
|
||||
from .videobam import VideoBamIE
|
||||
@@ -392,6 +406,7 @@ from .vine import (
|
||||
from .viki import VikiIE
|
||||
from .vk import VKIE
|
||||
from .vodlocker import VodlockerIE
|
||||
from .vporn import VpornIE
|
||||
from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
from .vulture import VultureIE
|
||||
|
61
youtube_dl/extractor/anysex.py
Normal file
61
youtube_dl/extractor/anysex.py
Normal file
@@ -0,0 +1,61 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AnySexIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?anysex\.com/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://anysex.com/156592/',
|
||||
'md5': '023e9fbb7f7987f5529a394c34ad3d3d',
|
||||
'info_dict': {
|
||||
'id': '156592',
|
||||
'ext': 'mp4',
|
||||
'title': 'Busty and sexy blondie in her bikini strips for you',
|
||||
'description': 'md5:de9e418178e2931c10b62966474e1383',
|
||||
'categories': ['Erotic'],
|
||||
'duration': 270,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._html_search_regex(r"video_url\s*:\s*'([^']+)'", webpage, 'video URL')
|
||||
|
||||
title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False)
|
||||
thumbnail = self._html_search_regex(
|
||||
r'preview_url\s*:\s*\'(.*?)\'', webpage, 'thumbnail', fatal=False)
|
||||
|
||||
categories = re.findall(
|
||||
r'<a href="http://anysex\.com/categories/[^"]+" title="[^"]*">([^<]+)</a>', webpage)
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<b>Duration:</b> (\d+:\d+)', webpage, 'duration', fatal=False))
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'<b>Views:</b> (\d+)', webpage, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'categories': categories,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'age_limit': 18,
|
||||
}
|
@@ -78,7 +78,8 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
|
||||
def _extract_from_webpage(self, webpage, video_id, lang):
|
||||
json_url = self._html_search_regex(
|
||||
r'arte_vp_url="(.*?)"', webpage, 'json vp url')
|
||||
[r'arte_vp_url=["\'](.*?)["\']', r'data-url=["\']([^"]+)["\']'],
|
||||
webpage, 'json vp url')
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
||||
def _extract_from_json_url(self, json_url, video_id, lang):
|
||||
|
65
youtube_dl/extractor/beeg.py
Normal file
65
youtube_dl/extractor/beeg.py
Normal file
@@ -0,0 +1,65 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class BeegIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://beeg.com/5416503',
|
||||
'md5': '634526ae978711f6b748fe0dd6c11f57',
|
||||
'info_dict': {
|
||||
'id': '5416503',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sultry Striptease',
|
||||
'description': 'md5:6db3c6177972822aaba18652ff59c773',
|
||||
'categories': list, # NSFW
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
quality_arr = self._search_regex(
|
||||
r'(?s)var\s+qualityArr\s*=\s*{\s*(.+?)\s*}', webpage, 'quality formats')
|
||||
|
||||
formats = [{
|
||||
'url': fmt[1],
|
||||
'format_id': fmt[0],
|
||||
'height': int(fmt[0][:-1]),
|
||||
} for fmt in re.findall(r"'([^']+)'\s*:\s*'([^']+)'", quality_arr)]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^<]+)\s*-\s*beeg\.?</title>', webpage, 'title')
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'<meta name="description" content="([^"]*)"',
|
||||
webpage, 'description', fatal=False)
|
||||
thumbnail = self._html_search_regex(
|
||||
r'\'previewer.url\'\s*:\s*"([^"]*)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
categories_str = self._html_search_regex(
|
||||
r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False)
|
||||
categories = (
|
||||
None if categories_str is None
|
||||
else categories_str.split(','))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
}
|
@@ -1,11 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
translation_table = {
|
||||
_translation_table = {
|
||||
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
|
||||
'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r',
|
||||
'y': 'l', 'z': 'i',
|
||||
@@ -13,6 +15,10 @@ translation_table = {
|
||||
}
|
||||
|
||||
|
||||
def _decode(s):
|
||||
return ''.join(_translation_table.get(c, c) for c in s)
|
||||
|
||||
|
||||
class CliphunterIE(InfoExtractor):
|
||||
IE_NAME = 'cliphunter'
|
||||
|
||||
@@ -22,10 +28,14 @@ class CliphunterIE(InfoExtractor):
|
||||
'''
|
||||
_TEST = {
|
||||
'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
|
||||
'file': '1012420.flv',
|
||||
'md5': '15e7740f30428abf70f4223478dc1225',
|
||||
'md5': 'a2ba71eebf523859fe527a61018f723e',
|
||||
'info_dict': {
|
||||
'id': '1012420',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fun Jynx Maze solo',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
'duration': 1317,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,22 +45,55 @@ class CliphunterIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_title = self._search_regex(
|
||||
r'mediaTitle = "([^"]+)"', webpage, 'title')
|
||||
|
||||
pl_fiji = self._search_regex(
|
||||
r'pl_fiji = \'([^\']+)\'', webpage, 'video data')
|
||||
pl_c_qual = self._search_regex(
|
||||
r'pl_c_qual = "(.)"', webpage, 'video quality')
|
||||
video_title = self._search_regex(
|
||||
r'mediaTitle = "([^"]+)"', webpage, 'title')
|
||||
|
||||
video_url = ''.join(translation_table.get(c, c) for c in pl_fiji)
|
||||
|
||||
video_url = _decode(pl_fiji)
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'format_id': pl_c_qual,
|
||||
'format_id': 'default-%s' % pl_c_qual,
|
||||
}]
|
||||
|
||||
qualities_json = self._search_regex(
|
||||
r'var pl_qualities\s*=\s*(.*?);\n', webpage, 'quality info')
|
||||
qualities_data = json.loads(qualities_json)
|
||||
|
||||
for i, t in enumerate(
|
||||
re.findall(r"pl_fiji_([a-z0-9]+)\s*=\s*'([^']+')", webpage)):
|
||||
quality_id, crypted_url = t
|
||||
video_url = _decode(crypted_url)
|
||||
f = {
|
||||
'format_id': quality_id,
|
||||
'url': video_url,
|
||||
'quality': i,
|
||||
}
|
||||
if quality_id in qualities_data:
|
||||
qd = qualities_data[quality_id]
|
||||
m = re.match(
|
||||
r'''(?x)<b>(?P<width>[0-9]+)x(?P<height>[0-9]+)<\\/b>
|
||||
\s*\(\s*(?P<tbr>[0-9]+)\s*kb\\/s''', qd)
|
||||
if m:
|
||||
f['width'] = int(m.group('width'))
|
||||
f['height'] = int(m.group('height'))
|
||||
f['tbr'] = int(m.group('tbr'))
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r"var\s+mov_thumb\s*=\s*'([^']+)';",
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'pl_dur\s*=\s*([0-9]+)', webpage, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
'duration': duration,
|
||||
'age_limit': self._rta_search(webpage),
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
@@ -43,14 +43,14 @@ class ComedyCentralShowsIE(InfoExtractor):
|
||||
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
|
||||
((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
|
||||
(?P<clip>
|
||||
(?:(?:guests/[^/]+|videos|video-playlists|special-editions)/[^/]+/(?P<videotitle>[^/?#]+))
|
||||
(?:(?:guests/[^/]+|videos|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
|
||||
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
|
||||
)|
|
||||
(?P<interview>
|
||||
extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
|
||||
(?:[?#].*|$)'''
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
|
||||
'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
|
||||
'info_dict': {
|
||||
@@ -61,7 +61,34 @@ class ComedyCentralShowsIE(InfoExtractor):
|
||||
'uploader': 'thedailyshow',
|
||||
'title': 'thedailyshow kristen-stewart part 1',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_available_formats = ['3500', '2200', '1700', '1200', '750', '400']
|
||||
|
||||
@@ -185,6 +212,9 @@ class ComedyCentralShowsIE(InfoExtractor):
|
||||
'ext': self._video_extensions.get(format, 'mp4'),
|
||||
'height': h,
|
||||
'width': w,
|
||||
|
||||
'format_note': 'HTTP 400 at the moment (patches welcome!)',
|
||||
'preference': -100,
|
||||
})
|
||||
formats.append({
|
||||
'format_id': 'rtmp-%s' % format,
|
||||
|
@@ -677,9 +677,12 @@ class InfoExtractor(object):
|
||||
}
|
||||
codecs = last_info.get('CODECS')
|
||||
if codecs:
|
||||
video, audio = codecs.split(',')
|
||||
f['vcodec'] = video.partition('.')[0]
|
||||
f['acodec'] = audio.partition('.')[0]
|
||||
# TODO: looks like video codec is not always necessarily goes first
|
||||
va_codecs = codecs.split(',')
|
||||
if va_codecs[0]:
|
||||
f['vcodec'] = va_codecs[0].partition('.')[0]
|
||||
if len(va_codecs) > 1 and va_codecs[1]:
|
||||
f['acodec'] = va_codecs[1].partition('.')[0]
|
||||
resolution = last_info.get('RESOLUTION')
|
||||
if resolution:
|
||||
width_str, height_str = resolution.split('x')
|
||||
|
@@ -5,6 +5,7 @@ import re
|
||||
import json
|
||||
import base64
|
||||
import zlib
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from hashlib import sha1
|
||||
from math import pow, sqrt, floor
|
||||
@@ -17,6 +18,7 @@ from ..utils import (
|
||||
intlist_to_bytes,
|
||||
unified_strdate,
|
||||
clean_html,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..aes import (
|
||||
aes_cbc_decrypt,
|
||||
@@ -51,6 +53,26 @@ class CrunchyrollIE(InfoExtractor):
|
||||
'1080': ('80', '108'),
|
||||
}
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
self.report_login()
|
||||
login_url = 'https://www.crunchyroll.com/?a=formhandler'
|
||||
data = urlencode_postdata({
|
||||
'formname': 'RpcApiUser_Login',
|
||||
'name': username,
|
||||
'password': password,
|
||||
})
|
||||
login_request = compat_urllib_request.Request(login_url, data)
|
||||
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
self._download_webpage(login_request, None, False, 'Wrong login info')
|
||||
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
|
||||
def _decrypt_subtitles(self, data, iv, id):
|
||||
data = bytes_to_intlist(data)
|
||||
iv = bytes_to_intlist(iv)
|
||||
@@ -97,6 +119,75 @@ class CrunchyrollIE(InfoExtractor):
|
||||
output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
|
||||
return output
|
||||
|
||||
def _convert_subtitles_to_ass(self, subtitles):
|
||||
output = ''
|
||||
|
||||
def ass_bool(strvalue):
|
||||
assvalue = '0'
|
||||
if strvalue == '1':
|
||||
assvalue = '-1'
|
||||
return assvalue
|
||||
|
||||
sub_root = xml.etree.ElementTree.fromstring(subtitles)
|
||||
if not sub_root:
|
||||
return output
|
||||
|
||||
output = '[Script Info]\n'
|
||||
output += 'Title: %s\n' % sub_root.attrib["title"]
|
||||
output += 'ScriptType: v4.00+\n'
|
||||
output += 'WrapStyle: %s\n' % sub_root.attrib["wrap_style"]
|
||||
output += 'PlayResX: %s\n' % sub_root.attrib["play_res_x"]
|
||||
output += 'PlayResY: %s\n' % sub_root.attrib["play_res_y"]
|
||||
output += """ScaledBorderAndShadow: yes
|
||||
|
||||
[V4+ Styles]
|
||||
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
||||
"""
|
||||
for style in sub_root.findall('./styles/style'):
|
||||
output += 'Style: ' + style.attrib["name"]
|
||||
output += ',' + style.attrib["font_name"]
|
||||
output += ',' + style.attrib["font_size"]
|
||||
output += ',' + style.attrib["primary_colour"]
|
||||
output += ',' + style.attrib["secondary_colour"]
|
||||
output += ',' + style.attrib["outline_colour"]
|
||||
output += ',' + style.attrib["back_colour"]
|
||||
output += ',' + ass_bool(style.attrib["bold"])
|
||||
output += ',' + ass_bool(style.attrib["italic"])
|
||||
output += ',' + ass_bool(style.attrib["underline"])
|
||||
output += ',' + ass_bool(style.attrib["strikeout"])
|
||||
output += ',' + style.attrib["scale_x"]
|
||||
output += ',' + style.attrib["scale_y"]
|
||||
output += ',' + style.attrib["spacing"]
|
||||
output += ',' + style.attrib["angle"]
|
||||
output += ',' + style.attrib["border_style"]
|
||||
output += ',' + style.attrib["outline"]
|
||||
output += ',' + style.attrib["shadow"]
|
||||
output += ',' + style.attrib["alignment"]
|
||||
output += ',' + style.attrib["margin_l"]
|
||||
output += ',' + style.attrib["margin_r"]
|
||||
output += ',' + style.attrib["margin_v"]
|
||||
output += ',' + style.attrib["encoding"]
|
||||
output += '\n'
|
||||
|
||||
output += """
|
||||
[Events]
|
||||
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
"""
|
||||
for event in sub_root.findall('./events/event'):
|
||||
output += 'Dialogue: 0'
|
||||
output += ',' + event.attrib["start"]
|
||||
output += ',' + event.attrib["end"]
|
||||
output += ',' + event.attrib["style"]
|
||||
output += ',' + event.attrib["name"]
|
||||
output += ',' + event.attrib["margin_l"]
|
||||
output += ',' + event.attrib["margin_r"]
|
||||
output += ',' + event.attrib["margin_v"]
|
||||
output += ',' + event.attrib["effect"]
|
||||
output += ',' + event.attrib["text"]
|
||||
output += '\n'
|
||||
|
||||
return output
|
||||
|
||||
def _real_extract(self,url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
@@ -158,6 +249,7 @@ class CrunchyrollIE(InfoExtractor):
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
|
||||
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
|
||||
sub_page = self._download_webpage('http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
|
||||
video_id, note='Downloading subtitles for '+sub_name)
|
||||
@@ -174,7 +266,10 @@ class CrunchyrollIE(InfoExtractor):
|
||||
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
||||
if not lang_code:
|
||||
continue
|
||||
subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
|
||||
if sub_format == 'ass':
|
||||
subtitles[lang_code] = self._convert_subtitles_to_ass(subtitle)
|
||||
else:
|
||||
subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
74
youtube_dl/extractor/dbtv.py
Normal file
74
youtube_dl/extractor/dbtv.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
class DBTVIE(InfoExtractor):
|
||||
_VALID_URL = r'http://dbtv\.no/(?P<id>[0-9]+)#(?P<display_id>.+)'
|
||||
_TEST = {
|
||||
'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
|
||||
'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc',
|
||||
'info_dict': {
|
||||
'id': '33100',
|
||||
'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
|
||||
'ext': 'mp4',
|
||||
'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen',
|
||||
'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'timestamp': 1404039863.438,
|
||||
'upload_date': '20140629',
|
||||
'duration': 69.544,
|
||||
'view_count': int,
|
||||
'categories': list,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
data = self._download_json(
|
||||
'http://api.dbtv.no/discovery/%s' % video_id, display_id)
|
||||
|
||||
video = data['playlist'][0]
|
||||
|
||||
formats = [{
|
||||
'url': f['URL'],
|
||||
'vcodec': f.get('container'),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
'vbr': float_or_none(f.get('rate'), 1000),
|
||||
'filesize': int_or_none(f.get('size')),
|
||||
} for f in video['renditions'] if 'URL' in f]
|
||||
|
||||
if not formats:
|
||||
for url_key, format_id in [('URL', 'mp4'), ('HLSURL', 'hls')]:
|
||||
if url_key in video:
|
||||
formats.append({
|
||||
'url': video[url_key],
|
||||
'format_id': format_id,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video['id'],
|
||||
'display_id': display_id,
|
||||
'title': video['title'],
|
||||
'description': clean_html(video['desc']),
|
||||
'thumbnail': video.get('splash') or video.get('thumb'),
|
||||
'timestamp': float_or_none(video.get('publishedAt'), 1000),
|
||||
'duration': float_or_none(video.get('length'), 1000),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'categories': video.get('tags'),
|
||||
'formats': formats,
|
||||
}
|
70
youtube_dl/extractor/drtuber.py
Normal file
70
youtube_dl/extractor/drtuber.py
Normal file
@@ -0,0 +1,70 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import str_to_int
|
||||
|
||||
|
||||
class DrTuberIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P<id>\d+)/(?P<display_id>[\w-]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
|
||||
'md5': '93e680cf2536ad0dfb7e74d94a89facd',
|
||||
'info_dict': {
|
||||
'id': '1740434',
|
||||
'display_id': 'hot-perky-blonde-naked-golf',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hot Perky Blonde Naked Golf',
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'categories': list, # NSFW
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'<source src="([^"]+)"', webpage, 'video URL')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^<]+)\s*-\s*Free', webpage, 'title')
|
||||
|
||||
thumbnail = self._html_search_regex(
|
||||
r'poster="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
like_count = str_to_int(self._html_search_regex(
|
||||
r'<span id="rate_likes">\s*<img[^>]+>\s*<span>([\d,\.]+)</span>',
|
||||
webpage, 'like count', fatal=False))
|
||||
dislike_count = str_to_int(self._html_search_regex(
|
||||
r'<span id="rate_dislikes">\s*<img[^>]+>\s*<span>([\d,\.]+)</span>',
|
||||
webpage, 'like count', fatal=False))
|
||||
comment_count = str_to_int(self._html_search_regex(
|
||||
r'<span class="comments_count">([\d,\.]+)</span>',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
cats_str = self._html_search_regex(
|
||||
r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False)
|
||||
categories = None if cats_str is None else cats_str.split(' ')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'comment_count': comment_count,
|
||||
'categories': categories,
|
||||
'age_limit': self._rta_search(webpage),
|
||||
}
|
@@ -1,58 +1,25 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import fix_xml_ampersands
|
||||
from .tnaflix import TNAFlixIE
|
||||
|
||||
|
||||
class EmpflixIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://www\.empflix\.com/videos/.*?-(?P<id>[0-9]+)\.html'
|
||||
class EMPFlixIE(TNAFlixIE):
|
||||
_VALID_URL = r'^https?://www\.empflix\.com/videos/(?P<display_id>[0-9a-zA-Z-]+)-(?P<id>[0-9]+)\.html'
|
||||
|
||||
_TITLE_REGEX = r'name="title" value="(?P<title>[^"]*)"'
|
||||
_DESCRIPTION_REGEX = r'name="description" value="([^"]*)"'
|
||||
_CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
|
||||
'md5': 'b1bc15b6412d33902d6e5952035fcabc',
|
||||
'info_dict': {
|
||||
'id': '33051',
|
||||
'display_id': 'Amateur-Finger-Fuck',
|
||||
'ext': 'mp4',
|
||||
'title': 'Amateur Finger Fuck',
|
||||
'description': 'Amateur solo finger fucking.',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'name="title" value="(?P<title>[^"]*)"', webpage, 'title')
|
||||
video_description = self._html_search_regex(
|
||||
r'name="description" value="([^"]*)"', webpage, 'description', fatal=False)
|
||||
|
||||
cfg_url = self._html_search_regex(
|
||||
r'flashvars\.config = escape\("([^"]+)"',
|
||||
webpage, 'flashvars.config')
|
||||
|
||||
cfg_xml = self._download_xml(
|
||||
cfg_url, video_id, note='Downloading metadata',
|
||||
transform_source=fix_xml_ampersands)
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': item.find('videoLink').text,
|
||||
'format_id': item.find('res').text,
|
||||
} for item in cfg_xml.findall('./quality/item')
|
||||
]
|
||||
thumbnail = cfg_xml.find('./startThumb').text
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
75
youtube_dl/extractor/eporner.py
Normal file
75
youtube_dl/extractor/eporner.py
Normal file
@@ -0,0 +1,75 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class EpornerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\d+)/(?P<display_id>[\w-]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
|
||||
'md5': '3b427ae4b9d60619106de3185c2987cd',
|
||||
'info_dict': {
|
||||
'id': '95008',
|
||||
'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
|
||||
'ext': 'flv',
|
||||
'title': 'Infamous Tiffany Teen Strip Tease Video',
|
||||
'duration': 194,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.*?) - EPORNER', webpage, 'title')
|
||||
|
||||
redirect_code = self._html_search_regex(
|
||||
r'<script type="text/javascript" src="/config5/%s/([a-f\d]+)/">' % video_id,
|
||||
webpage, 'redirect_code')
|
||||
redirect_url = 'http://www.eporner.com/config5/%s/%s' % (video_id, redirect_code)
|
||||
player_code = self._download_webpage(
|
||||
redirect_url, display_id, note='Downloading player config')
|
||||
|
||||
sources = self._search_regex(
|
||||
r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', player_code, 'sources')
|
||||
|
||||
formats = []
|
||||
for video_url, format_id in re.findall(r'file\s*:\s*"([^"]+)",\s*label\s*:\s*"([^"]+)"', sources):
|
||||
fmt = {
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
}
|
||||
m = re.search(r'^(\d+)', format_id)
|
||||
if m:
|
||||
fmt['height'] = int(m.group(1))
|
||||
formats.append(fmt)
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'class="mbtim">([0-9:]+)</div>', webpage, 'duration',
|
||||
fatal=False))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'id="cinemaviews">\s*([0-9,]+)\s*<small>views',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
'age_limit': self._rta_search(webpage),
|
||||
}
|
@@ -79,7 +79,8 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
check_form = {
|
||||
'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'),
|
||||
'h': self._search_regex(r'name="h" value="(\w*?)"', login_results, 'h'),
|
||||
'h': self._search_regex(
|
||||
r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h'),
|
||||
'name_action_selected': 'dont_save',
|
||||
}
|
||||
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
|
||||
|
@@ -366,7 +366,22 @@ class GenericIE(InfoExtractor):
|
||||
'extract_flat': False,
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
},
|
||||
# MLB embed
|
||||
{
|
||||
'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
|
||||
'md5': '96f09a37e44da40dd083e12d9a683327',
|
||||
'info_dict': {
|
||||
'id': '33322633',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ump changes call to ball',
|
||||
'description': 'md5:71c11215384298a172a6dcb4c2e20685',
|
||||
'duration': 48,
|
||||
'timestamp': 1401537900,
|
||||
'upload_date': '20140531',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def report_download_webpage(self, video_id):
|
||||
@@ -613,7 +628,7 @@ class GenericIE(InfoExtractor):
|
||||
embedSWF\(?:\s*
|
||||
)
|
||||
(["\'])
|
||||
(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
|
||||
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
|
||||
(?:embed|v)/.+?)
|
||||
\1''', webpage)
|
||||
if matches:
|
||||
@@ -809,6 +824,12 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'SBS')
|
||||
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://m\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'MLB')
|
||||
|
||||
# Start with something easy: JW Player in SWFObject
|
||||
found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||
if not found:
|
||||
|
56
youtube_dl/extractor/hornbunny.py
Normal file
56
youtube_dl/extractor/hornbunny.py
Normal file
@@ -0,0 +1,56 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class HornBunnyIE(InfoExtractor):
|
||||
_VALID_URL = r'http?://(?:www\.)?hornbunny\.com/videos/(?P<title_dash>[a-z-]+)-(?P<id>\d+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://hornbunny.com/videos/panty-slut-jerk-off-instruction-5227.html',
|
||||
'md5': '95e40865aedd08eff60272b704852ad7',
|
||||
'info_dict': {
|
||||
'id': '5227',
|
||||
'ext': 'flv',
|
||||
'title': 'panty slut jerk off instruction',
|
||||
'duration': 550,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, note='Downloading initial webpage')
|
||||
title = self._html_search_regex(
|
||||
r'class="title">(.*?)</h2>', webpage, 'title')
|
||||
redirect_url = self._html_search_regex(
|
||||
r'pg&settings=(.*?)\|0"\);', webpage, 'title')
|
||||
webpage2 = self._download_webpage(redirect_url, video_id)
|
||||
video_url = self._html_search_regex(
|
||||
r'flvMask:(.*?);', webpage2, 'video_url')
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<strong>Runtime:</strong>\s*([0-9:]+)</div>',
|
||||
webpage, 'duration', fatal=False))
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'<strong>Views:</strong>\s*(\d+)</div>',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'ext': 'flv',
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'age_limit': 18,
|
||||
}
|
84
youtube_dl/extractor/hostingbulk.py
Normal file
84
youtube_dl/extractor/hostingbulk.py
Normal file
@@ -0,0 +1,84 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
compat_urllib_request,
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class HostingBulkIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?hostingbulk\.com/
|
||||
(?:embed-)?(?P<id>[A-Za-z0-9]{12})(?:-\d+x\d+)?\.html'''
|
||||
_FILE_DELETED_REGEX = r'<b>File Not Found</b>'
|
||||
_TEST = {
|
||||
'url': 'http://hostingbulk.com/n0ulw1hv20fm.html',
|
||||
'md5': '6c8653c8ecf7ebfa83b76e24b7b2fe3f',
|
||||
'info_dict': {
|
||||
'id': 'n0ulw1hv20fm',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:5afeba33f48ec87219c269e054afd622',
|
||||
'filesize': 6816081,
|
||||
'thumbnail': 're:^http://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
url = 'http://hostingbulk.com/{0:}.html'.format(video_id)
|
||||
|
||||
# Custom request with cookie to set language to English, so our file
|
||||
# deleted regex would work.
|
||||
request = compat_urllib_request.Request(
|
||||
url, headers={'Cookie': 'lang=english'})
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
|
||||
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
|
||||
raise ExtractorError('Video %s does not exist' % video_id,
|
||||
expected=True)
|
||||
|
||||
title = self._html_search_regex(r'<h3>(.*?)</h3>', webpage, 'title')
|
||||
filesize = int_or_none(
|
||||
self._search_regex(
|
||||
r'<small>\((\d+)\sbytes?\)</small>',
|
||||
webpage,
|
||||
'filesize',
|
||||
fatal=False
|
||||
)
|
||||
)
|
||||
thumbnail = self._search_regex(
|
||||
r'<img src="([^"]+)".+?class="pic"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
fields = dict(re.findall(r'''(?x)<input\s+
|
||||
type="hidden"\s+
|
||||
name="([^"]+)"\s+
|
||||
value="([^"]*)"
|
||||
''', webpage))
|
||||
|
||||
request = compat_urllib_request.Request(url, urlencode_postdata(fields))
|
||||
request.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
response = self._request_webpage(request, video_id,
|
||||
'Submiting download request')
|
||||
video_url = response.geturl()
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'filesize': filesize,
|
||||
'url': video_url,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
@@ -9,29 +9,50 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class IzleseneIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?izlesene\.com/(?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+)'
|
||||
_STREAM_URL = 'http://panel.izlesene.com/api/streamurl/{id:}/{format:}'
|
||||
_TEST = {
|
||||
'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694',
|
||||
'md5': '4384f9f0ea65086734b881085ee05ac2',
|
||||
'info_dict': {
|
||||
'id': '7599694',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
|
||||
'description': 'Annesi oğluna doğum günü hediyesi olarak minecraft cd si alıyor, ve çocuk hunharca seviniyor',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'uploader_id': 'pelikzzle',
|
||||
'timestamp': 1404298698,
|
||||
'upload_date': '20140702',
|
||||
'duration': 95.395,
|
||||
'age_limit': 0,
|
||||
}
|
||||
}
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:(?:www|m)\.)?izlesene\.com/
|
||||
(?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+)
|
||||
'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694',
|
||||
'md5': '4384f9f0ea65086734b881085ee05ac2',
|
||||
'info_dict': {
|
||||
'id': '7599694',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
|
||||
'description': 'md5:253753e2655dde93f59f74b572454f6d',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'uploader_id': 'pelikzzle',
|
||||
'timestamp': 1404298698,
|
||||
'upload_date': '20140702',
|
||||
'duration': 95.395,
|
||||
'age_limit': 0,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.izlesene.com/video/tarkan-dortmund-2006-konseri/17997',
|
||||
'md5': '97f09b6872bffa284cb7fa4f6910cb72',
|
||||
'info_dict': {
|
||||
'id': '17997',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tarkan Dortmund 2006 Konseri',
|
||||
'description': 'Tarkan Dortmund 2006 Konseri',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'uploader_id': 'parlayankiz',
|
||||
'timestamp': 1163318593,
|
||||
'upload_date': '20061112',
|
||||
'duration': 253.666,
|
||||
'age_limit': 0,
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -45,18 +66,19 @@ class IzleseneIE(InfoExtractor):
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r"adduserUsername\s*=\s*'([^']+)';", webpage, 'uploader', fatal=False, default='')
|
||||
r"adduserUsername\s*=\s*'([^']+)';",
|
||||
webpage, 'uploader', fatal=False, default='')
|
||||
timestamp = parse_iso8601(self._html_search_meta(
|
||||
'uploadDate', webpage, 'upload date', fatal=False))
|
||||
|
||||
duration = int_or_none(self._html_search_regex(
|
||||
r'"videoduration"\s*:\s*"([^"]+)"', webpage, 'duration', fatal=False))
|
||||
if duration:
|
||||
duration /= 1000.0
|
||||
duration = float_or_none(self._html_search_regex(
|
||||
r'"videoduration"\s*:\s*"([^"]+)"',
|
||||
webpage, 'duration', fatal=False), scale=1000)
|
||||
|
||||
view_count = str_to_int(get_element_by_id('videoViewCount', webpage))
|
||||
comment_count = self._html_search_regex(
|
||||
r'comment_count\s*=\s*\'([^\']+)\';', webpage, 'uploader', fatal=False)
|
||||
r'comment_count\s*=\s*\'([^\']+)\';',
|
||||
webpage, 'comment_count', fatal=False)
|
||||
|
||||
family_friendly = self._html_search_meta(
|
||||
'isFamilyFriendly', webpage, 'age limit', fatal=False)
|
||||
@@ -66,20 +88,26 @@ class IzleseneIE(InfoExtractor):
|
||||
ext = determine_ext(content_url, 'mp4')
|
||||
|
||||
# Might be empty for some videos.
|
||||
qualities = self._html_search_regex(
|
||||
r'"quality"\s*:\s*"([^"]+)"', webpage, 'qualities', fatal=False, default='')
|
||||
streams = self._html_search_regex(
|
||||
r'"qualitylevel"\s*:\s*"([^"]+)"',
|
||||
webpage, 'streams', fatal=False, default='')
|
||||
|
||||
formats = []
|
||||
for quality in qualities.split('|'):
|
||||
json = self._download_json(
|
||||
self._STREAM_URL.format(id=video_id, format=quality), video_id,
|
||||
note='Getting video URL for "%s" quality' % quality,
|
||||
errnote='Failed to get video URL for "%s" quality' % quality
|
||||
)
|
||||
if streams:
|
||||
for stream in streams.split('|'):
|
||||
quality, url = re.search(r'\[(\w+)\](.+)', stream).groups()
|
||||
formats.append({
|
||||
'format_id': '%sp' % quality if quality else 'sd',
|
||||
'url': url,
|
||||
'ext': ext,
|
||||
})
|
||||
else:
|
||||
stream_url = self._search_regex(
|
||||
r'"streamurl"\s?:\s?"([^"]+)"', webpage, 'stream URL')
|
||||
formats.append({
|
||||
'url': json.get('streamurl'),
|
||||
'format_id': 'sd',
|
||||
'url': stream_url,
|
||||
'ext': ext,
|
||||
'format_id': '%sp' % quality if quality else 'sd',
|
||||
})
|
||||
|
||||
return {
|
||||
|
@@ -9,7 +9,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class KhanAcademyIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
|
||||
_VALID_URL = r'^https?://(?:(?:www|api)\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
|
||||
IE_NAME = 'KhanAcademy'
|
||||
|
||||
_TESTS = [{
|
||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class MLBIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://m\.mlb\.com/(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v(?P<id>n?\d+)'
|
||||
_VALID_URL = r'https?://m\.mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|shared/video/embed/embed\.html\?.*?\bcontent_id=)(?P<id>n?\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
|
||||
@@ -69,6 +69,10 @@ class MLBIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
112
youtube_dl/extractor/moevideo.py
Normal file
112
youtube_dl/extractor/moevideo.py
Normal file
@@ -0,0 +1,112 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class MoeVideoIE(InfoExtractor):
|
||||
IE_DESC = 'LetitBit video services: moevideo.net, playreplay.net and videochart.net'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?P<host>(?:www\.)?
|
||||
(?:(?:moevideo|playreplay|videochart)\.net))/
|
||||
(?:video|framevideo)/(?P<id>[0-9]+\.[0-9A-Za-z]+)'''
|
||||
_API_URL = 'http://api.letitbit.net/'
|
||||
_API_KEY = 'tVL0gjqo5'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://moevideo.net/video/00297.0036103fe3d513ef27915216fd29',
|
||||
'md5': '129f5ae1f6585d0e9bb4f38e774ffb3a',
|
||||
'info_dict': {
|
||||
'id': '00297.0036103fe3d513ef27915216fd29',
|
||||
'ext': 'flv',
|
||||
'title': 'Sink cut out machine',
|
||||
'description': 'md5:f29ff97b663aefa760bf7ca63c8ca8a8',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'width': 540,
|
||||
'height': 360,
|
||||
'duration': 179,
|
||||
'filesize': 17822500,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://playreplay.net/video/77107.7f325710a627383d40540d8e991a',
|
||||
'md5': '74f0a014d5b661f0f0e2361300d1620e',
|
||||
'info_dict': {
|
||||
'id': '77107.7f325710a627383d40540d8e991a',
|
||||
'ext': 'flv',
|
||||
'title': 'Operacion Condor.',
|
||||
'description': 'md5:7e68cb2fcda66833d5081c542491a9a3',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'width': 480,
|
||||
'height': 296,
|
||||
'duration': 6027,
|
||||
'filesize': 588257923,
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://%s/video/%s' % (mobj.group('host'), video_id),
|
||||
video_id, 'Downloading webpage')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
r = [
|
||||
self._API_KEY,
|
||||
[
|
||||
'preview/flv_link',
|
||||
{
|
||||
'uid': video_id,
|
||||
},
|
||||
],
|
||||
]
|
||||
r_json = json.dumps(r)
|
||||
post = compat_urllib_parse.urlencode({'r': r_json})
|
||||
req = compat_urllib_request.Request(self._API_URL, post)
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
|
||||
response = self._download_json(req, video_id)
|
||||
if response['status'] != 'OK':
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, response['data']),
|
||||
expected=True
|
||||
)
|
||||
item = response['data'][0]
|
||||
video_url = item['link']
|
||||
duration = int_or_none(item['length'])
|
||||
width = int_or_none(item['width'])
|
||||
height = int_or_none(item['height'])
|
||||
filesize = int_or_none(item['convert_size'])
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'http_headers': {'Range': 'bytes=0-'}, # Required to download
|
||||
'url': video_url,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'filesize': filesize,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@@ -3,18 +3,23 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
remove_end,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class NBAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
|
||||
_TEST = {
|
||||
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
|
||||
'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
|
||||
'md5': 'c0edcfc37607344e2ff8f13c378c88a4',
|
||||
'info_dict': {
|
||||
'id': '0021200253-okc-bkn-recap.nba',
|
||||
'ext': 'mp4',
|
||||
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
||||
'title': 'Thunder vs. Nets',
|
||||
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
||||
'duration': 181,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -27,13 +32,18 @@ class NBAIE(InfoExtractor):
|
||||
video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
|
||||
|
||||
shortened_video_id = video_id.rpartition('/')[2]
|
||||
title = self._og_search_title(webpage, default=shortened_video_id).replace('NBA.com: ', '')
|
||||
title = remove_end(
|
||||
self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com')
|
||||
|
||||
description = self._og_search_description(webpage)
|
||||
duration = parse_duration(
|
||||
self._html_search_meta('duration', webpage, 'duration', fatal=False))
|
||||
|
||||
description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False)
|
||||
|
||||
return {
|
||||
'id': shortened_video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
}
|
||||
|
@@ -5,7 +5,10 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
unified_strdate,
|
||||
compat_str,
|
||||
)
|
||||
@@ -13,6 +16,8 @@ from ..utils import (
|
||||
|
||||
class NocoIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
|
||||
_LOGIN_URL = 'http://noco.tv/do.php'
|
||||
_NETRC_MACHINE = 'noco'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
|
||||
@@ -30,6 +35,28 @@ class NocoIE(InfoExtractor):
|
||||
'skip': 'Requires noco account',
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_form = {
|
||||
'a': 'login',
|
||||
'cookie': '1',
|
||||
'username': username,
|
||||
'password': password,
|
||||
}
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
|
||||
|
||||
login = self._download_json(request, None, 'Logging in as %s' % username)
|
||||
|
||||
if 'erreur' in login:
|
||||
raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
65
youtube_dl/extractor/nosvideo.py
Normal file
65
youtube_dl/extractor/nosvideo.py
Normal file
@@ -0,0 +1,65 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
_x = lambda p: xpath_with_ns(p, {'xspf': 'http://xspf.org/ns/0/'})
|
||||
_find = lambda el, p: el.find(_x(p)).text.strip()
|
||||
|
||||
|
||||
class NosVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nosvideo\.com/' + \
|
||||
'(?:embed/|\?v=)(?P<id>[A-Za-z0-9]{12})/?'
|
||||
_PLAYLIST_URL = 'http://nosvideo.com/xml/{xml_id:s}.xml'
|
||||
_TEST = {
|
||||
'url': 'http://nosvideo.com/?v=drlp6s40kg54',
|
||||
'md5': '4b4ac54c6ad5d70ab88f2c2c6ccec71c',
|
||||
'info_dict': {
|
||||
'id': 'drlp6s40kg54',
|
||||
'ext': 'mp4',
|
||||
'title': 'big_buck_bunny_480p_surround-fix.avi.mp4',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
fields = {
|
||||
'id': video_id,
|
||||
'op': 'download1',
|
||||
'method_free': 'Continue to Video',
|
||||
}
|
||||
post = compat_urllib_parse.urlencode(fields)
|
||||
req = compat_urllib_request.Request(url, post)
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
webpage = self._download_webpage(req, video_id,
|
||||
'Downloading download page')
|
||||
xml_id = self._search_regex(r'php\|([^\|]+)\|', webpage, 'XML ID')
|
||||
playlist_url = self._PLAYLIST_URL.format(xml_id=xml_id)
|
||||
playlist = self._download_xml(playlist_url, video_id)
|
||||
|
||||
track = playlist.find(_x('.//xspf:track'))
|
||||
title = _find(track, './xspf:title')
|
||||
url = _find(track, './xspf:file')
|
||||
thumbnail = _find(track, './xspf:image')
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'url': url,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
@@ -5,6 +5,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
@@ -17,7 +18,7 @@ class NPOIE(InfoExtractor):
|
||||
'md5': '4b3f9c429157ec4775f2c9cb7b911016',
|
||||
'info_dict': {
|
||||
'id': 'VPWON_1220719',
|
||||
'ext': 'mp4',
|
||||
'ext': 'm4v',
|
||||
'title': 'Nieuwsuur',
|
||||
'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
|
||||
'upload_date': '20140622',
|
||||
@@ -39,24 +40,32 @@ class NPOIE(InfoExtractor):
|
||||
video_id,
|
||||
note='Downloading token'
|
||||
)
|
||||
token = self._search_regex(r'npoplayer.token = "(.+?)"', token_page, 'token')
|
||||
streams_info = self._download_json(
|
||||
'http://ida.omroep.nl/odi/?prid=%s&puboptions=h264_std&adaptive=yes&token=%s' % (video_id, token),
|
||||
video_id
|
||||
)
|
||||
token = self._search_regex(r'npoplayer\.token = "(.+?)"', token_page, 'token')
|
||||
|
||||
stream_info = self._download_json(
|
||||
streams_info['streams'][0] + '&type=json',
|
||||
video_id,
|
||||
'Downloading stream info'
|
||||
)
|
||||
formats = []
|
||||
quality = qualities(['adaptive', 'h264_sb', 'h264_bb', 'h264_std'])
|
||||
for format_id in metadata['pubopties']:
|
||||
streams_info = self._download_json(
|
||||
'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s' % (video_id, format_id, token),
|
||||
video_id, 'Downloading %s streams info' % format_id)
|
||||
stream_info = self._download_json(
|
||||
streams_info['streams'][0] + '&type=json',
|
||||
video_id, 'Downloading %s stream info' % format_id)
|
||||
if format_id == 'adaptive':
|
||||
formats.extend(self._extract_m3u8_formats(stream_info['url'], video_id))
|
||||
else:
|
||||
formats.append({
|
||||
'url': stream_info['url'],
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': metadata['titel'],
|
||||
'ext': 'mp4',
|
||||
'url': stream_info['url'],
|
||||
'description': metadata['info'],
|
||||
'thumbnail': metadata['images'][-1]['url'],
|
||||
'upload_date': unified_strdate(metadata['gidsdatum']),
|
||||
'formats': formats,
|
||||
}
|
||||
|
67
youtube_dl/extractor/promptfile.py
Normal file
67
youtube_dl/extractor/promptfile.py
Normal file
@@ -0,0 +1,67 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
|
||||
|
||||
class PromptFileIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?promptfile\.com/l/(?P<id>[0-9A-Z\-]+)'
|
||||
_FILE_NOT_FOUND_REGEX = r'<div.+id="not_found_msg".+>.+</div>[^-]'
|
||||
_TEST = {
|
||||
'url': 'http://www.promptfile.com/l/D21B4746E9-F01462F0FF',
|
||||
'md5': 'd1451b6302da7215485837aaea882c4c',
|
||||
'info_dict': {
|
||||
'id': 'D21B4746E9-F01462F0FF',
|
||||
'ext': 'mp4',
|
||||
'title': 'Birds.mp4',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
|
||||
raise ExtractorError('Video %s does not exist' % video_id,
|
||||
expected=True)
|
||||
|
||||
fields = dict(re.findall(r'''(?x)type="hidden"\s+
|
||||
name="(.+?)"\s+
|
||||
value="(.*?)"
|
||||
''', webpage))
|
||||
post = compat_urllib_parse.urlencode(fields)
|
||||
req = compat_urllib_request.Request(url, post)
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
webpage = self._download_webpage(
|
||||
req, video_id, 'Downloading video page')
|
||||
|
||||
url = self._html_search_regex(r'url:\s*\'([^\']+)\'', webpage, 'URL')
|
||||
title = self._html_search_regex(
|
||||
r'<span.+title="([^"]+)">', webpage, 'title')
|
||||
thumbnail = self._html_search_regex(
|
||||
r'<div id="player_overlay">.*button>.*?<img src="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False, flags=re.DOTALL)
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'url': url,
|
||||
'ext': determine_ext(title),
|
||||
}]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
@@ -100,7 +100,7 @@ class RUTVIE(InfoExtractor):
|
||||
return mobj.group('url')
|
||||
|
||||
mobj = re.search(
|
||||
r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>http://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)',
|
||||
r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
91
youtube_dl/extractor/sharesix.py
Normal file
91
youtube_dl/extractor/sharesix.py
Normal file
@@ -0,0 +1,91 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class ShareSixIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?sharesix\.com/(?:f/)?(?P<id>[0-9a-zA-Z]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://sharesix.com/f/OXjQ7Y6',
|
||||
'md5': '9e8e95d8823942815a7d7c773110cc93',
|
||||
'info_dict': {
|
||||
'id': 'OXjQ7Y6',
|
||||
'ext': 'mp4',
|
||||
'title': 'big_buck_bunny_480p_surround-fix.avi',
|
||||
'duration': 596,
|
||||
'width': 854,
|
||||
'height': 480,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://sharesix.com/lfrwoxp35zdd',
|
||||
'md5': 'dd19f1435b7cec2d7912c64beeee8185',
|
||||
'info_dict': {
|
||||
'id': 'lfrwoxp35zdd',
|
||||
'ext': 'flv',
|
||||
'title': 'WhiteBoard___a_Mac_vs_PC_Parody_Cartoon.mp4.flv',
|
||||
'duration': 65,
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
fields = {
|
||||
'method_free': 'Free'
|
||||
}
|
||||
post = compat_urllib_parse.urlencode(fields)
|
||||
req = compat_urllib_request.Request(url, post)
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
|
||||
webpage = self._download_webpage(req, video_id,
|
||||
'Downloading video page')
|
||||
|
||||
video_url = self._search_regex(
|
||||
r"var\slnk1\s=\s'([^']+)'", webpage, 'video URL')
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<dt>Filename:</dt>.+?<dd>(.+?)</dd>', webpage, 'title')
|
||||
duration = parse_duration(
|
||||
self._search_regex(
|
||||
r'(?s)<dt>Length:</dt>.+?<dd>(.+?)</dd>',
|
||||
webpage,
|
||||
'duration',
|
||||
fatal=False
|
||||
)
|
||||
)
|
||||
|
||||
m = re.search(
|
||||
r'''(?xs)<dt>Width\sx\sHeight</dt>.+?
|
||||
<dd>(?P<width>\d+)\sx\s(?P<height>\d+)</dd>''',
|
||||
webpage
|
||||
)
|
||||
width = height = None
|
||||
if m:
|
||||
width, height = int(m.group('width')), int(m.group('height'))
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'url': video_url,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
70
youtube_dl/extractor/sunporno.py
Normal file
70
youtube_dl/extractor/sunporno.py
Normal file
@@ -0,0 +1,70 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
int_or_none,
|
||||
qualities,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class SunPornoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?sunporno\.com/videos/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.sunporno.com/videos/807778/',
|
||||
'md5': '6457d3c165fd6de062b99ef6c2ff4c86',
|
||||
'info_dict': {
|
||||
'id': '807778',
|
||||
'ext': 'flv',
|
||||
'title': 'md5:0a400058e8105d39e35c35e7c5184164',
|
||||
'description': 'md5:a31241990e1bd3a64e72ae99afb325fb',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 302,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
|
||||
description = self._html_search_meta('description', webpage, 'description')
|
||||
thumbnail = self._html_search_regex(
|
||||
r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<span>Duration: (\d+:\d+)</span>', webpage, 'duration', fatal=False))
|
||||
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'<span class="views">(\d+)</span>', webpage, 'view count', fatal=False))
|
||||
comment_count = int_or_none(self._html_search_regex(
|
||||
r'(\d+)</b> Comments?', webpage, 'comment count', fatal=False))
|
||||
|
||||
formats = []
|
||||
quality = qualities(['mp4', 'flv'])
|
||||
for video_url in re.findall(r'<source src="([^"]+)"', webpage):
|
||||
video_ext = determine_ext(video_url)
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': video_ext,
|
||||
'quality': quality(video_ext),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
}
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -11,24 +13,30 @@ class TechTalksIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
|
||||
u'playlist': [
|
||||
'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
|
||||
'info_dict': {
|
||||
'id': '57758',
|
||||
'title': 'Learning Topic Models --- Going beyond SVD',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
u'file': u'57758.flv',
|
||||
u'info_dict': {
|
||||
u'title': u'Learning Topic Models --- Going beyond SVD',
|
||||
'info_dict': {
|
||||
'id': '57758',
|
||||
'ext': 'flv',
|
||||
'title': 'Learning Topic Models --- Going beyond SVD',
|
||||
},
|
||||
},
|
||||
{
|
||||
u'file': u'57758-slides.flv',
|
||||
u'info_dict': {
|
||||
u'title': u'Learning Topic Models --- Going beyond SVD',
|
||||
'info_dict': {
|
||||
'id': '57758-slides',
|
||||
'ext': 'flv',
|
||||
'title': 'Learning Topic Models --- Going beyond SVD',
|
||||
},
|
||||
},
|
||||
],
|
||||
u'params': {
|
||||
'params': {
|
||||
# rtmp download
|
||||
u'skip_download': True,
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -36,30 +44,36 @@ class TechTalksIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
talk_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, talk_id)
|
||||
rtmp_url = self._search_regex(r'netConnectionUrl: \'(.*?)\'', webpage,
|
||||
u'rtmp url')
|
||||
play_path = self._search_regex(r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
|
||||
webpage, u'presenter play path')
|
||||
rtmp_url = self._search_regex(
|
||||
r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url')
|
||||
play_path = self._search_regex(
|
||||
r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
|
||||
webpage, 'presenter play path')
|
||||
title = clean_html(get_element_by_attribute('class', 'title', webpage))
|
||||
video_info = {
|
||||
'id': talk_id,
|
||||
'title': title,
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
'ext': 'flv',
|
||||
}
|
||||
'id': talk_id,
|
||||
'title': title,
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
'ext': 'flv',
|
||||
}
|
||||
m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage)
|
||||
if m_slides is None:
|
||||
return video_info
|
||||
else:
|
||||
return [
|
||||
video_info,
|
||||
# The slides video
|
||||
{
|
||||
'id': talk_id + '-slides',
|
||||
'title': title,
|
||||
'url': rtmp_url,
|
||||
'play_path': m_slides.group(1),
|
||||
'ext': 'flv',
|
||||
},
|
||||
]
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': talk_id,
|
||||
'title': title,
|
||||
'entries': [
|
||||
video_info,
|
||||
# The slides video
|
||||
{
|
||||
'id': talk_id + '-slides',
|
||||
'title': title,
|
||||
'url': rtmp_url,
|
||||
'play_path': m_slides.group(1),
|
||||
'ext': 'flv',
|
||||
},
|
||||
],
|
||||
}
|
||||
|
84
youtube_dl/extractor/tnaflix.py
Normal file
84
youtube_dl/extractor/tnaflix.py
Normal file
@@ -0,0 +1,84 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
fix_xml_ampersands,
|
||||
)
|
||||
|
||||
|
||||
class TNAFlixIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tnaflix\.com/(?P<cat_id>[\w-]+)/(?P<display_id>[\w-]+)/video(?P<id>\d+)'
|
||||
|
||||
_TITLE_REGEX = None
|
||||
_DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
|
||||
_CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
|
||||
'md5': 'ecf3498417d09216374fc5907f9c6ec0',
|
||||
'info_dict': {
|
||||
'id': '553878',
|
||||
'display_id': 'Carmella-Decesare-striptease',
|
||||
'ext': 'mp4',
|
||||
'title': 'Carmella Decesare - striptease',
|
||||
'description': '',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'duration': 91,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
self._TITLE_REGEX, webpage, 'title') if self._TITLE_REGEX else self._og_search_title(webpage)
|
||||
description = self._html_search_regex(
|
||||
self._DESCRIPTION_REGEX, webpage, 'description', fatal=False, default='')
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
duration = self._html_search_meta('duration', webpage, 'duration', default=None)
|
||||
if duration:
|
||||
duration = parse_duration(duration[1:])
|
||||
|
||||
cfg_url = self._html_search_regex(
|
||||
self._CONFIG_REGEX, webpage, 'flashvars.config')
|
||||
|
||||
cfg_xml = self._download_xml(
|
||||
cfg_url, display_id, note='Downloading metadata',
|
||||
transform_source=fix_xml_ampersands)
|
||||
|
||||
thumbnail = cfg_xml.find('./startThumb').text
|
||||
|
||||
formats = []
|
||||
for item in cfg_xml.findall('./quality/item'):
|
||||
video_url = re.sub('speed=\d+', 'speed=', item.find('videoLink').text)
|
||||
format_id = item.find('res').text
|
||||
fmt = {
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
}
|
||||
m = re.search(r'^(\d+)', format_id)
|
||||
if m:
|
||||
fmt['height'] = int(m.group(1))
|
||||
formats.append(fmt)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
@@ -47,7 +47,7 @@ class ToypicsUserIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'Mikey',
|
||||
},
|
||||
'playlist_mincount': 9917,
|
||||
'playlist_mincount': 19,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1,5 +1,7 @@
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
@@ -9,22 +11,29 @@ from .common import InfoExtractor
|
||||
class TudouIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs|albumplay)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?'
|
||||
_TESTS = [{
|
||||
u'url': u'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
|
||||
u'file': u'159448201.f4v',
|
||||
u'md5': u'140a49ed444bd22f93330985d8475fcb',
|
||||
u'info_dict': {
|
||||
u"title": u"卡马乔国足开大脚长传冲吊集锦"
|
||||
'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
|
||||
'md5': '140a49ed444bd22f93330985d8475fcb',
|
||||
'info_dict': {
|
||||
'id': '159448201',
|
||||
'ext': 'f4v',
|
||||
'title': '卡马乔国足开大脚长传冲吊集锦',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
},
|
||||
{
|
||||
u'url': u'http://www.tudou.com/albumplay/TenTw_JgiPM/PzsAs5usU9A.html',
|
||||
u'file': u'todo.mp4',
|
||||
u'md5': u'todo.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'todo.mp4',
|
||||
}, {
|
||||
'url': 'http://www.tudou.com/programs/view/ajX3gyhL0pc/',
|
||||
'info_dict': {
|
||||
'id': '117049447',
|
||||
'ext': 'f4v',
|
||||
'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.tudou.com/albumplay/TenTw_JgiPM/PzsAs5usU9A.html',
|
||||
'info_dict': {
|
||||
'title': 'todo.mp4',
|
||||
},
|
||||
u'add_ie': [u'Youku'],
|
||||
u'skip': u'Only works from China'
|
||||
'add_ie': ['Youku'],
|
||||
'skip': 'Only works from China'
|
||||
}]
|
||||
|
||||
def _url_for_id(self, id, quality = None):
|
||||
@@ -44,20 +53,22 @@ class TudouIE(InfoExtractor):
|
||||
if m and m.group(1):
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': u'youku:' + m.group(1),
|
||||
'url': 'youku:' + m.group(1),
|
||||
'ie_key': 'Youku'
|
||||
}
|
||||
|
||||
title = self._search_regex(
|
||||
r",kw:\s*['\"](.+?)[\"']", webpage, u'title')
|
||||
r",kw:\s*['\"](.+?)[\"']", webpage, 'title')
|
||||
thumbnail_url = self._search_regex(
|
||||
r",pic:\s*[\"'](.+?)[\"']", webpage, u'thumbnail URL', fatal=False)
|
||||
r",pic:\s*[\"'](.+?)[\"']", webpage, 'thumbnail URL', fatal=False)
|
||||
|
||||
segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments')
|
||||
segments = json.loads(segs_json)
|
||||
# It looks like the keys are the arguments that have to be passed as
|
||||
# the hd field in the request url, we pick the higher
|
||||
quality = sorted(segments.keys())[-1]
|
||||
# Also, filter non-number qualities (see issue #3643).
|
||||
quality = sorted(filter(lambda k: k.isdigit(), segments.keys()),
|
||||
key=lambda k: int(k))[-1]
|
||||
parts = segments[quality]
|
||||
result = []
|
||||
len_parts = len(parts)
|
||||
@@ -67,12 +78,13 @@ class TudouIE(InfoExtractor):
|
||||
part_id = part['k']
|
||||
final_url = self._url_for_id(part_id, quality)
|
||||
ext = (final_url.split('?')[0]).split('.')[-1]
|
||||
part_info = {'id': part_id,
|
||||
'url': final_url,
|
||||
'ext': ext,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail_url,
|
||||
}
|
||||
part_info = {
|
||||
'id': '%s' % part_id,
|
||||
'url': final_url,
|
||||
'ext': ext,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail_url,
|
||||
}
|
||||
result.append(part_info)
|
||||
|
||||
return result
|
||||
|
@@ -5,80 +5,82 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class TvigleIE(InfoExtractor):
|
||||
IE_NAME = 'tvigle'
|
||||
IE_DESC = 'Интернет-телевидение Tvigle.ru'
|
||||
_VALID_URL = r'http://(?:www\.)?tvigle\.ru/category/.+?[\?&]v(?:ideo)?=(?P<id>\d+)'
|
||||
_VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.tvigle.ru/category/cinema/1608/?video=503081',
|
||||
'md5': '09afba4616666249f087efc6dcf83cb3',
|
||||
'url': 'http://www.tvigle.ru/video/brat-2/',
|
||||
'md5': '72cb7eab33e54314e1790da402d3c9c3',
|
||||
'info_dict': {
|
||||
'id': '503081',
|
||||
'ext': 'flv',
|
||||
'id': '5119390',
|
||||
'display_id': 'brat-2',
|
||||
'ext': 'mp4',
|
||||
'title': 'Брат 2 ',
|
||||
'description': 'md5:f5a42970f50648cee3d7ad740f3ae769',
|
||||
'upload_date': '20110919',
|
||||
'description': 'md5:5751f4fe345a58e1692585c361294bd8',
|
||||
'duration': 7356.369,
|
||||
'age_limit': 0,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.tvigle.ru/category/men/vysotskiy_vospominaniya02/?flt=196&v=676433',
|
||||
'md5': 'e7efe5350dd5011d0de6550b53c3ba7b',
|
||||
'url': 'http://www.tvigle.ru/video/vladimir-vysotskii/vedushchii-teleprogrammy-60-minut-ssha-o-vladimire-vysotskom/',
|
||||
'md5': 'd9012d7c7c598fe7a11d7fb46dc1f574',
|
||||
'info_dict': {
|
||||
'id': '676433',
|
||||
'ext': 'flv',
|
||||
'id': '5142516',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ведущий телепрограммы «60 минут» (США) о Владимире Высоцком',
|
||||
'description': 'md5:027f7dc872948f14c96d19b4178428a4',
|
||||
'upload_date': '20121218',
|
||||
'duration': 186.080,
|
||||
'age_limit': 0,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
video_data = self._download_xml(
|
||||
'http://www.tvigle.ru/xml/single.php?obj=%s' % video_id, video_id, 'Downloading video XML')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video = video_data.find('./video')
|
||||
video_id = self._html_search_regex(
|
||||
r'<li class="video-preview current_playing" id="(\d+)">', webpage, 'video id')
|
||||
|
||||
title = video.get('name')
|
||||
description = video.get('anons')
|
||||
if description:
|
||||
description = clean_html(description)
|
||||
thumbnail = video_data.get('img')
|
||||
upload_date = unified_strdate(video.get('date'))
|
||||
like_count = int_or_none(video.get('vtp'))
|
||||
video_data = self._download_json(
|
||||
'http://cloud.tvigle.ru/api/play/video/%s/' % video_id, display_id)
|
||||
|
||||
item = video_data['playlist']['items'][0]
|
||||
|
||||
title = item['title']
|
||||
description = item['description']
|
||||
thumbnail = item['thumbnail']
|
||||
duration = float_or_none(item['durationMilliseconds'], 1000)
|
||||
age_limit = str_to_int(item['ageRestrictions'])
|
||||
|
||||
formats = []
|
||||
for num, (format_id, format_note) in enumerate([['low_file', 'SQ'], ['file', 'HQ'], ['hd', 'HD 720']]):
|
||||
video_url = video.get(format_id)
|
||||
if not video_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'format_note': format_note,
|
||||
'quality': num,
|
||||
})
|
||||
|
||||
for vcodec, fmts in item['videos'].items():
|
||||
for quality, video_url in fmts.items():
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': '%s-%s' % (vcodec, quality),
|
||||
'vcodec': vcodec,
|
||||
'height': int(quality[:-1]),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'like_count': like_count,
|
||||
'age_limit': 18,
|
||||
'duration': duration,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
@@ -1,32 +1,66 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import qualities
|
||||
|
||||
|
||||
class UnistraIE(InfoExtractor):
|
||||
_VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(\d+)'
|
||||
_VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://utv.unistra.fr/video.php?id_video=154',
|
||||
u'file': u'154.mp4',
|
||||
u'md5': u'736f605cfdc96724d55bb543ab3ced24',
|
||||
u'info_dict': {
|
||||
u'title': u'M!ss Yella',
|
||||
u'description': u'md5:104892c71bd48e55d70b902736b81bbf',
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://utv.unistra.fr/video.php?id_video=154',
|
||||
'md5': '736f605cfdc96724d55bb543ab3ced24',
|
||||
'info_dict': {
|
||||
'id': '154',
|
||||
'ext': 'mp4',
|
||||
'title': 'M!ss Yella',
|
||||
'description': 'md5:104892c71bd48e55d70b902736b81bbf',
|
||||
},
|
||||
},
|
||||
}
|
||||
{
|
||||
'url': 'http://utv.unistra.fr/index.php?id_video=437',
|
||||
'md5': '1ddddd6cccaae76f622ce29b8779636d',
|
||||
'info_dict': {
|
||||
'id': '437',
|
||||
'ext': 'mp4',
|
||||
'title': 'Prix Louise Weiss 2014',
|
||||
'description': 'md5:cc3a8735f079f4fb6b0b570fc10c135a',
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = re.match(self._VALID_URL, url).group(1)
|
||||
webpage = self._download_webpage(url, id)
|
||||
file = re.search(r'file: "(.*?)",', webpage).group(1)
|
||||
title = self._html_search_regex(r'<title>UTV - (.*?)</', webpage, u'title')
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return {'id': id,
|
||||
'title': title,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
'description': self._html_search_regex(r'<meta name="Description" content="(.*?)"', webpage, u'description', flags=re.DOTALL),
|
||||
'thumbnail': self._search_regex(r'image: "(.*?)"', webpage, u'thumbnail'),
|
||||
}
|
||||
files = set(re.findall(r'file\s*:\s*"([^"]+)"', webpage))
|
||||
|
||||
quality = qualities(['SD', 'HD'])
|
||||
formats = []
|
||||
for file_path in files:
|
||||
format_id = 'HD' if file_path.endswith('-HD.mp4') else 'SD'
|
||||
formats.append({
|
||||
'url': 'http://vod-flash.u-strasbg.fr:8080%s' % file_path,
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id)
|
||||
})
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>UTV - (.*?)</', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<meta name="Description" content="(.*?)"', webpage, 'description', flags=re.DOTALL)
|
||||
thumbnail = self._search_regex(
|
||||
r'image: "(.*?)"', webpage, 'thumbnail')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats
|
||||
}
|
||||
|
119
youtube_dl/extractor/vgtv.py
Normal file
119
youtube_dl/extractor/vgtv.py
Normal file
@@ -0,0 +1,119 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none
|
||||
|
||||
|
||||
class VGTVIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?vgtv\.no/#!/(?:.*)/(?P<id>[0-9]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
# streamType: vod
|
||||
'url': 'http://www.vgtv.no/#!/video/84196/hevnen-er-soet-episode-10-abu',
|
||||
'md5': 'b8be7a234cebb840c0d512c78013e02f',
|
||||
'info_dict': {
|
||||
'id': '84196',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hevnen er søt episode 10: Abu',
|
||||
'description': 'md5:e25e4badb5f544b04341e14abdc72234',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 648.000,
|
||||
'timestamp': 1404626400,
|
||||
'upload_date': '20140706',
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
# streamType: wasLive
|
||||
'url': 'http://www.vgtv.no/#!/live/100764/opptak-vgtv-foelger-em-kvalifiseringen',
|
||||
'info_dict': {
|
||||
'id': '100764',
|
||||
'ext': 'mp4',
|
||||
'title': 'OPPTAK: VGTV følger EM-kvalifiseringen',
|
||||
'description': 'md5:3772d9c0dc2dff92a886b60039a7d4d3',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 9056.000,
|
||||
'timestamp': 1410113864,
|
||||
'upload_date': '20140907',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# streamType: live
|
||||
'url': 'http://www.vgtv.no/#!/live/100015/direkte-her-kan-du-se-laksen-live-fra-suldalslaagen',
|
||||
'info_dict': {
|
||||
'id': '100015',
|
||||
'ext': 'mp4',
|
||||
'title': 'DIREKTE: Her kan du se laksen live fra Suldalslågen!',
|
||||
'description': 'md5:9a60cc23fa349f761628924e56eeec2d',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 0,
|
||||
'timestamp': 1407423348,
|
||||
'upload_date': '20140807',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
data = self._download_json(
|
||||
'http://svp.vg.no/svp/api/v1/vgtv/assets/%s?appName=vgtv-website' % video_id,
|
||||
video_id, 'Downloading media JSON')
|
||||
|
||||
streams = data['streamUrls']
|
||||
|
||||
formats = []
|
||||
|
||||
hls_url = streams.get('hls')
|
||||
if hls_url:
|
||||
formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4'))
|
||||
|
||||
hds_url = streams.get('hds')
|
||||
if hds_url:
|
||||
formats.extend(self._extract_f4m_formats(hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', video_id))
|
||||
|
||||
mp4_url = streams.get('mp4')
|
||||
if mp4_url:
|
||||
_url = hls_url or hds_url
|
||||
MP4_URL_TEMPLATE = '%s/%%s.%s' % (mp4_url.rpartition('/')[0], mp4_url.rpartition('.')[-1])
|
||||
for mp4_format in _url.split(','):
|
||||
m = re.search('(?P<width>\d+)_(?P<height>\d+)_(?P<vbr>\d+)', mp4_format)
|
||||
if not m:
|
||||
continue
|
||||
width = int(m.group('width'))
|
||||
height = int(m.group('height'))
|
||||
vbr = int(m.group('vbr'))
|
||||
formats.append({
|
||||
'url': MP4_URL_TEMPLATE % mp4_format,
|
||||
'format_id': 'mp4-%s' % vbr,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
'preference': 1,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': data['title'],
|
||||
'description': data['description'],
|
||||
'thumbnail': data['images']['main'] + '?t[]=900x506q80',
|
||||
'timestamp': data['published'],
|
||||
'duration': float_or_none(data['duration'], 1000),
|
||||
'view_count': data['displays'],
|
||||
'formats': formats,
|
||||
}
|
99
youtube_dl/extractor/vporn.py
Normal file
99
youtube_dl/extractor/vporn.py
Normal file
@@ -0,0 +1,99 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class VpornIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vporn\.com/[^/]+/(?P<display_id>[^/]+)/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/',
|
||||
'md5': 'facf37c1b86546fa0208058546842c55',
|
||||
'info_dict': {
|
||||
'id': '497944',
|
||||
'display_id': 'violet-on-her-th-birthday',
|
||||
'ext': 'mp4',
|
||||
'title': 'Violet on her 19th birthday',
|
||||
'description': 'Violet dances in front of the camera which is sure to get you horny.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'kileyGrope',
|
||||
'categories': ['Masturbation', 'Teen'],
|
||||
'duration': 393,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip()
|
||||
description = self._html_search_regex(
|
||||
r'<div class="description_txt">(.*?)</div>', webpage, 'description', fatal=False)
|
||||
thumbnail = self._html_search_regex(
|
||||
r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description', fatal=False, default=None)
|
||||
if thumbnail:
|
||||
thumbnail = 'http://www.vporn.com' + thumbnail
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'(?s)UPLOADED BY.*?<a href="/user/[^"]+">([^<]+)</a>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
categories = re.findall(r'<a href="/cat/[^"]+">([^<]+)</a>', webpage)
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'duration (\d+ min \d+ sec)', webpage, 'duration', fatal=False))
|
||||
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r'<span>([\d,\.]+) VIEWS</span>', webpage, 'view count', fatal=False))
|
||||
like_count = str_to_int(self._html_search_regex(
|
||||
r'<span id="like" class="n">([\d,\.]+)</span>', webpage, 'like count', fatal=False))
|
||||
dislike_count = str_to_int(self._html_search_regex(
|
||||
r'<span id="dislike" class="n">([\d,\.]+)</span>', webpage, 'dislike count', fatal=False))
|
||||
comment_count = str_to_int(self._html_search_regex(
|
||||
r'<h4>Comments \(<b>([\d,\.]+)</b>\)</h4>', webpage, 'comment count', fatal=False))
|
||||
|
||||
formats = []
|
||||
|
||||
for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"([^"]+)"', webpage):
|
||||
video_url = video[1]
|
||||
fmt = {
|
||||
'url': video_url,
|
||||
'format_id': video[0],
|
||||
}
|
||||
m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)_(?P<vbr>\d+)k\.mp4$', video_url)
|
||||
if m:
|
||||
fmt.update({
|
||||
'width': int(m.group('width')),
|
||||
'height': int(m.group('height')),
|
||||
'vbr': int(m.group('vbr')),
|
||||
})
|
||||
formats.append(fmt)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'categories': categories,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'comment_count': comment_count,
|
||||
'age_limit': 18,
|
||||
'formats': formats,
|
||||
}
|
@@ -13,6 +13,9 @@ class WashingtonPostIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
|
||||
_TEST = {
|
||||
'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
|
||||
'info_dict': {
|
||||
'title': 'Sinkhole of bureaucracy',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'c3f4b4922ffa259243f68e928db2db8c',
|
||||
'info_dict': {
|
||||
|
@@ -1,7 +1,5 @@
|
||||
# coding: utf-8
|
||||
|
||||
import errno
|
||||
import io
|
||||
import itertools
|
||||
import json
|
||||
import os.path
|
||||
@@ -21,7 +19,6 @@ from ..utils import (
|
||||
compat_str,
|
||||
|
||||
clean_html,
|
||||
get_cachedir,
|
||||
get_element_by_id,
|
||||
get_element_by_attribute,
|
||||
ExtractorError,
|
||||
@@ -30,7 +27,6 @@ from ..utils import (
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
orderedSet,
|
||||
write_json_file,
|
||||
uppercase_escape,
|
||||
)
|
||||
|
||||
@@ -316,6 +312,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
u"upload_date": u"20121002",
|
||||
u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .",
|
||||
u"categories": [u'Science & Technology'],
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -433,26 +431,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
func_id = '%s_%s_%s' % (
|
||||
player_type, player_id, self._signature_cache_id(example_sig))
|
||||
assert os.path.basename(func_id) == func_id
|
||||
cache_dir = get_cachedir(self._downloader.params)
|
||||
|
||||
cache_enabled = cache_dir is not None
|
||||
if cache_enabled:
|
||||
cache_fn = os.path.join(os.path.expanduser(cache_dir),
|
||||
u'youtube-sigfuncs',
|
||||
func_id + '.json')
|
||||
try:
|
||||
with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
|
||||
cache_spec = json.load(cachef)
|
||||
return lambda s: u''.join(s[i] for i in cache_spec)
|
||||
except IOError:
|
||||
pass # No cache available
|
||||
except ValueError:
|
||||
try:
|
||||
file_size = os.path.getsize(cache_fn)
|
||||
except (OSError, IOError) as oe:
|
||||
file_size = str(oe)
|
||||
self._downloader.report_warning(
|
||||
u'Cache %s failed (%s)' % (cache_fn, file_size))
|
||||
cache_spec = self._downloader.cache.load(u'youtube-sigfuncs', func_id)
|
||||
if cache_spec is not None:
|
||||
return lambda s: u''.join(s[i] for i in cache_spec)
|
||||
|
||||
if player_type == 'js':
|
||||
code = self._download_webpage(
|
||||
@@ -470,22 +452,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
else:
|
||||
assert False, 'Invalid player type %r' % player_type
|
||||
|
||||
if cache_enabled:
|
||||
try:
|
||||
test_string = u''.join(map(compat_chr, range(len(example_sig))))
|
||||
cache_res = res(test_string)
|
||||
cache_spec = [ord(c) for c in cache_res]
|
||||
try:
|
||||
os.makedirs(os.path.dirname(cache_fn))
|
||||
except OSError as ose:
|
||||
if ose.errno != errno.EEXIST:
|
||||
raise
|
||||
write_json_file(cache_spec, cache_fn)
|
||||
except Exception:
|
||||
tb = traceback.format_exc()
|
||||
self._downloader.report_warning(
|
||||
u'Writing cache to %r failed: %s' % (cache_fn, tb))
|
||||
if cache_spec is None:
|
||||
test_string = u''.join(map(compat_chr, range(len(example_sig))))
|
||||
cache_res = res(test_string)
|
||||
cache_spec = [ord(c) for c in cache_res]
|
||||
|
||||
self._downloader.cache.store(u'youtube-sigfuncs', func_id, cache_spec)
|
||||
return res
|
||||
|
||||
def _print_sig_code(self, func, example_sig):
|
||||
@@ -784,7 +756,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
m_cat_container = get_element_by_id("eow-category", video_webpage)
|
||||
m_cat_container = self._search_regex(
|
||||
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
|
||||
video_webpage, 'categories', fatal=False)
|
||||
if m_cat_container:
|
||||
category = self._html_search_regex(
|
||||
r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
|
||||
@@ -813,15 +787,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
else:
|
||||
video_description = u''
|
||||
|
||||
def _extract_count(klass):
|
||||
def _extract_count(count_name):
|
||||
count = self._search_regex(
|
||||
r'class="%s">([\d,]+)</span>' % re.escape(klass),
|
||||
video_webpage, klass, default=None)
|
||||
r'id="watch-%s"[^>]*>.*?([\d,]+)\s*</span>' % re.escape(count_name),
|
||||
video_webpage, count_name, default=None)
|
||||
if count is not None:
|
||||
return int(count.replace(',', ''))
|
||||
return None
|
||||
like_count = _extract_count(u'likes-count')
|
||||
dislike_count = _extract_count(u'dislikes-count')
|
||||
like_count = _extract_count(u'like')
|
||||
dislike_count = _extract_count(u'dislike')
|
||||
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id, video_webpage)
|
||||
@@ -1051,21 +1025,26 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
self._login()
|
||||
|
||||
def _ids_to_results(self, ids):
|
||||
return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
|
||||
for vid_id in ids]
|
||||
return [
|
||||
self.url_result(vid_id, 'Youtube', video_id=vid_id)
|
||||
for vid_id in ids]
|
||||
|
||||
def _extract_mix(self, playlist_id):
|
||||
# The mixes are generated from a a single video
|
||||
# the id of the playlist is just 'RD' + video_id
|
||||
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
|
||||
webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
|
||||
webpage = self._download_webpage(
|
||||
url, playlist_id, u'Downloading Youtube mix')
|
||||
search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
|
||||
title_span = (search_title('playlist-title') or
|
||||
search_title('title long-title') or search_title('title'))
|
||||
title_span = (
|
||||
search_title('playlist-title') or
|
||||
search_title('title long-title') or
|
||||
search_title('title'))
|
||||
title = clean_html(title_span)
|
||||
video_re = r'''(?x)data-video-username=".*?".*?
|
||||
href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id)
|
||||
ids = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
|
||||
ids = orderedSet(re.findall(
|
||||
r'''(?xs)data-video-username=".*?".*?
|
||||
href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
|
||||
webpage))
|
||||
url_results = self._ids_to_results(ids)
|
||||
|
||||
return self.playlist_result(url_results, playlist_id, title)
|
||||
@@ -1158,6 +1137,7 @@ class YoutubeTopListIE(YoutubePlaylistIE):
|
||||
msg = u'Downloading Youtube mix'
|
||||
if i > 0:
|
||||
msg += ', retry #%d' % i
|
||||
|
||||
webpage = self._download_webpage(url, title, msg)
|
||||
ids = orderedSet(re.findall(video_re, webpage))
|
||||
if ids:
|
||||
@@ -1430,12 +1410,6 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
paging = mobj.group('paging')
|
||||
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
|
||||
|
||||
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
|
||||
IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
|
||||
_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
|
||||
_FEED_NAME = 'subscriptions'
|
||||
_PLAYLIST_TITLE = u'Youtube Subscriptions'
|
||||
|
||||
class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
|
||||
IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
|
||||
_VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
|
||||
@@ -1468,6 +1442,43 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
|
||||
return self.url_result(playlist_id, 'YoutubePlaylist')
|
||||
|
||||
|
||||
class YoutubeSubscriptionsIE(YoutubePlaylistIE):
|
||||
IE_NAME = u'youtube:subscriptions'
|
||||
IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
|
||||
_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
|
||||
|
||||
def _real_extract(self, url):
|
||||
title = u'Youtube Subscriptions'
|
||||
page = self._download_webpage('https://www.youtube.com/feed/subscriptions', title)
|
||||
|
||||
# The extraction process is the same as for playlists, but the regex
|
||||
# for the video ids doesn't contain an index
|
||||
ids = []
|
||||
more_widget_html = content_html = page
|
||||
|
||||
for page_num in itertools.count(1):
|
||||
matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
|
||||
new_ids = orderedSet(matches)
|
||||
ids.extend(new_ids)
|
||||
|
||||
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
|
||||
if not mobj:
|
||||
break
|
||||
|
||||
more = self._download_json(
|
||||
'https://youtube.com/%s' % mobj.group('more'), title,
|
||||
'Downloading page #%s' % page_num,
|
||||
transform_source=uppercase_escape)
|
||||
content_html = more['content_html']
|
||||
more_widget_html = more['load_more_widget_html']
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'title': title,
|
||||
'entries': self._ids_to_results(ids),
|
||||
}
|
||||
|
||||
|
||||
class YoutubeTruncatedURLIE(InfoExtractor):
|
||||
IE_NAME = 'youtube:truncated_url'
|
||||
IE_DESC = False # Do not list
|
||||
|
@@ -1076,12 +1076,6 @@ def intlist_to_bytes(xs):
|
||||
return bytes(xs)
|
||||
|
||||
|
||||
def get_cachedir(params={}):
|
||||
cache_root = os.environ.get('XDG_CACHE_HOME',
|
||||
os.path.expanduser('~/.cache'))
|
||||
return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
|
||||
|
||||
|
||||
# Cross-platform file locking
|
||||
if sys.platform == 'win32':
|
||||
import ctypes.wintypes
|
||||
@@ -1141,10 +1135,10 @@ else:
|
||||
import fcntl
|
||||
|
||||
def _lock_file(f, exclusive):
|
||||
fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
|
||||
fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
|
||||
|
||||
def _unlock_file(f):
|
||||
fcntl.lockf(f, fcntl.LOCK_UN)
|
||||
fcntl.flock(f, fcntl.LOCK_UN)
|
||||
|
||||
|
||||
class locked_file(object):
|
||||
@@ -1318,9 +1312,10 @@ def str_or_none(v, default=None):
|
||||
|
||||
|
||||
def str_to_int(int_str):
|
||||
""" A more relaxed version of int_or_none """
|
||||
if int_str is None:
|
||||
return None
|
||||
int_str = re.sub(r'[,\.]', u'', int_str)
|
||||
int_str = re.sub(r'[,\.\+]', u'', int_str)
|
||||
return int(int_str)
|
||||
|
||||
|
||||
@@ -1332,8 +1327,10 @@ def parse_duration(s):
|
||||
if s is None:
|
||||
return None
|
||||
|
||||
s = s.strip()
|
||||
|
||||
m = re.match(
|
||||
r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?(?::[0-9]+)?(?P<ms>\.[0-9]+)?$', s)
|
||||
r'(?i)(?:(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$', s)
|
||||
if not m:
|
||||
return None
|
||||
res = int(m.group('secs'))
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.08.28.2'
|
||||
__version__ = '2014.09.10.1'
|
||||
|
Reference in New Issue
Block a user