Compare commits

...

70 Commits

Author SHA1 Message Date
Sergey M․
b25459b88a release 2016.11.18 2016-11-18 00:25:24 +07:00
Sergey M․
5f75c4a4ad [ChangeLog] Actualize 2016-11-18 00:19:55 +07:00
Sergey M․
689f31fde5 [devscripts/create-github-release] Fill release body from ChangeLog (closes #11094) 2016-11-18 00:17:46 +07:00
Yen Chi Hsuan
582be35847 Update coding style after pycodestyle 2.1.0
In pycodestyle 2.1.0, E305 was introduced, which requires two blank
lines after top level declarations, too.

See https://github.com/PyCQA/pycodestyle/issues/400

See also #10689; thanks @stepshal for first mentioning this issue and
initial patches
2016-11-17 19:45:42 +08:00
Sergey M․
073d5bf583 [youtube:live] Relax _VALID_URL (closes #11164) 2016-11-16 23:15:19 +07:00
Yen Chi Hsuan
315cb86a95 Merge pull request #11210 from FooBarQuaxx/patch-2
Strip only args urls
2016-11-16 23:29:37 +08:00
FooBarQuaxx
b2fc1c4fb9 Add explanatory comment 2016-11-16 18:18:54 +03:00
Yen Chi Hsuan
d76767c90e [ChangeLog] Update after #11122 landed 2016-11-16 20:47:15 +08:00
Yen Chi Hsuan
eceba9f805 Merge pull request #11122 from kasper93/openload
[openload] Fix extraction.
2016-11-16 20:43:19 +08:00
MAA
d755396804 Strip only args urls 2016-11-16 09:00:30 +03:00
Sergey M․
58355a3bf1 [vlive] Add test for #11203 2016-11-15 22:11:47 +07:00
ping
49b69ad91c [vlive] Prefer locale over language for subtitles id 2016-11-15 22:07:17 +07:00
Sergey M․
6b4dfa2819 release 2016.11.14.1 2016-11-14 02:48:15 +07:00
Sergey M․
9f60134a9d [ChangeLog] Actualize 2016-11-14 02:46:12 +07:00
Sergey M․
b3d4bd05f9 release 2016.11.14 2016-11-14 02:39:50 +07:00
Sergey M․
dbffd00ba9 [ChangeLog] Actualize 2016-11-14 02:37:21 +07:00
Sergey M․
50913b8241 [nrk] Improve geo restriction detection 2016-11-13 22:29:36 +07:00
Sergey M․
7e08e2cab0 [nrk] Add X-Forwarded-For HTTP header in info dict 2016-11-13 22:28:29 +07:00
Sergey M․
690355551c [downoader/fragment,f4m,hls] Add internal support for custom HTTP headers 2016-11-13 22:22:10 +07:00
Sergey M․
754e6c8322 [nrk] Workaround geo restriction and improve error messages 2016-11-13 20:54:34 +07:00
Sergey M․
e58609b22c [afreecatv] Add support for vod.afreecatv.com (closes #11174) 2016-11-13 06:02:26 +07:00
Sergey M․
4ea4c0bb22 [extractor/common] Fix Bandwidth substitution in media template (closes #11175) 2016-11-13 05:43:34 +07:00
Kacper Michajłow
577281b0c6 [cda] Fix and improve extraction
Fixes #10929
2016-11-13 01:01:29 +07:00
Sergey M․
3d2729514f [plays] Improve extraction and add support for embed URLs 2016-11-12 23:08:05 +07:00
Sergey M․
f076d7972c [extractor/common] Improve thumbnail extraction from JSON-LD 2016-11-12 23:01:05 +07:00
cpm
8b1aeadc33 [plays] Fix extraction 2016-11-12 22:59:39 +07:00
Kacper Michajłow
95ad9ce573 [openload] Fix extraction.
aadecode code was restored from commit c1decda58c
with some optimizations (2x faster).

Fixes #10408
2016-11-11 15:36:57 +01:00
Kacper Michajłow
189935f159 [jsinterp] Fix function calls without arguments. 2016-11-11 15:36:57 +01:00
Sergey M․
bc40b3a5ba [eagleplatform] Fix extraction (closes #11160) 2016-11-11 03:26:29 +07:00
Yen Chi Hsuan
3eaaa8abac [audioboom] Recognize /posts/ URLs (closes #11149) 2016-11-10 14:52:34 +08:00
Sergey M․
db3367f43e release 2016.11.08.1 2016-11-08 22:30:53 +07:00
Sergey M․
6590925c27 [ChangeLog] Actualize 2016-11-08 22:29:16 +07:00
Sergey M․
4719af097c [extractors] Add forgotten import for espn:article 2016-11-08 22:27:02 +07:00
Sergey M․
9946aa5ccf [franceculture] Fix extraction (closes #11140) 2016-11-08 22:26:33 +07:00
Sergey M․
c58e07a7aa release 2016.11.08 2016-11-08 22:11:21 +07:00
Sergey M․
f700afa24c [ChangeLog] Actualize 2016-11-08 22:09:03 +07:00
Yen Chi Hsuan
5d47b38cf5 [tmz:article] Fix extraction (closes #11052) 2016-11-08 21:53:41 +08:00
Sergey M․
ebc7ab1e23 [espn] Fix extraction (closes #11041) 2016-11-08 00:29:12 +07:00
Sergey M․
97726317ac [README.md] Mention HTTP headers and alternative way to obtain cookies and headers in -g FAQ 2016-11-07 23:53:22 +07:00
DarkZeros
cb882540e8 [mitele] Fix extraction after website redesign (fixes #10824) 2016-11-07 11:13:59 +01:00
Sergey M․
98708e6cbd [ard] Remove age restriction check (closes #11129) 2016-11-06 23:20:15 +07:00
Sergey M․
b52c9ef165 [extractor/generic] Improve support for pornhub embeds (closes #11100) 2016-11-06 21:52:00 +07:00
Sergey M․
e28ed498e6 [extractor/generic] Add support for redtube embds (closes #11099) 2016-11-06 21:42:41 +07:00
Sergey M․
5021ca6c13 [redtube] Add support for embed URLs 2016-11-06 21:39:29 +07:00
Sergey M․
37e7a71c6c [extractor/generic] Add support for drtuber embds (closes #11098) 2016-11-06 21:33:51 +07:00
Sergey M․
f5c4b06f17 [drtuber] Fix title extraction 2016-11-06 21:29:15 +07:00
Sergey M․
519d897049 [drtuber] Add support for embed URLs 2016-11-06 21:28:51 +07:00
Sergey M․
b61cd51869 [yahoo] Add test and improve some content id regex 2016-11-06 21:16:33 +07:00
Sergey M․
f420902a3b [yahoo] Add another content id regex (closes #11088) 2016-11-06 21:14:15 +07:00
Sergey M․
de328af362 [toutv] Relax _VALID_URL (closes #11121) 2016-11-05 03:24:42 +07:00
Sergey M․
b30e4c2754 release 2016.11.04 2016-11-04 22:07:54 +07:00
Sergey M․
09ffe34b00 [ChangeLog] Actualize 2016-11-04 21:59:42 +07:00
Sergey M․
640aff1d0c [anvato] Improve formats extraction 2016-11-04 21:45:24 +07:00
Sergey M․
c897af8aac [cbslocal] Update test 2016-11-04 21:33:08 +07:00
Sergey M․
f3c705f8ec [fox9] Add extractor (closes #11110) 2016-11-04 21:32:30 +07:00
Sergey M․
f93ac1d175 [anvato] Extract more metadata 2016-11-04 21:17:56 +07:00
Sergey M․
c4c9b8440c [extractor/common] Tolerate malformed RESOLUTION attribute in m3u8 manifests (closes #11113) 2016-11-04 05:02:31 +07:00
Sergey M․
32f2627aed [vodlocker] Add another removed file pattern (closes #11106) 2016-11-03 22:22:40 +07:00
Sergey M․
9d64e1dcdc [downloader/ism] Fix typo 2016-11-03 22:15:09 +07:00
Remita Amine
10380e55de [downloader/ism] fix AVC Decoder Configuration Record creation in python 3 2016-11-03 16:08:57 +01:00
Remita Amine
22979993e7 [vice] add coding cookie 2016-11-03 16:07:22 +01:00
Remita Amine
b47ecd0b74 [vzaar] Add new extractor(closes #11093) 2016-11-03 12:50:41 +01:00
Yen Chi Hsuan
3a86b2c51e Ignore and clean .wav files 2016-11-03 18:55:55 +08:00
Remita Amine
b811b4c93b [vice] add support for uplynk preplay videos(#11101) 2016-11-03 10:37:07 +01:00
Remita Amine
f4dfa9a5ed [tubitv] fix extraction(closes #11061) 2016-11-03 09:04:20 +01:00
Remita Amine
3b4b66b50c [shahid] add support for authentication(closes #11091) 2016-11-03 00:44:12 +01:00
Sergey M․
4119a96ce5 [extractor/generic] Skip URLs we came from when delegating ISM extraction 2016-11-02 23:43:41 +07:00
Sergey M․
26aae56690 [extractor/generic] Improve ISM extraction 2016-11-02 23:34:37 +07:00
Remita Amine
4f9cd4d36f [radiocanada] extract subtitle(closes #11096) 2016-11-02 13:55:40 +01:00
Sergey M․
cc99a77ac1 [extractor/generic] Add support for ISM manifests 2016-11-02 03:01:13 +07:00
74 changed files with 1004 additions and 359 deletions

View File

@@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.02**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.18**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.11.02
[debug] youtube-dl version 2016.11.18
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

1
.gitignore vendored
View File

@@ -30,6 +30,7 @@ updates_key.pem
*.m4v
*.mp3
*.3gp
*.wav
*.part
*.swp
test/testdata

View File

@@ -1,3 +1,71 @@
version 2016.11.18
Extractors
* [youtube:live] Relax _VALID_URL (#11164)
* [openload] Fix extraction (#10408, #11122)
* [vlive] Prefer locale over language for subtitles id (#11203)
version 2016.11.14.1
Core
+ [downoader/fragment,f4m,hls] Respect HTTP headers from info dict
* [extractor/common] Fix media templates with Bandwidth substitution pattern in
MPD manifests (#11175)
* [extractor/common] Improve thumbnail extraction from JSON-LD
Extractors
+ [nrk] Workaround geo restriction
+ [nrk] Improve error detection and messages
+ [afreecatv] Add support for vod.afreecatv.com (#11174)
* [cda] Fix and improve extraction (#10929, #10936)
* [plays] Fix extraction (#11165)
* [eagleplatform] Fix extraction (#11160)
+ [audioboom] Recognize /posts/ URLs (#11149)
version 2016.11.08.1
Extractors
* [espn:article] Fix support for espn.com articles
* [franceculture] Fix extraction (#11140)
version 2016.11.08
Extractors
* [tmz:article] Fix extraction (#11052)
* [espn] Fix extraction (#11041)
* [mitele] Fix extraction after website redesign (#10824)
- [ard] Remove age restriction check (#11129)
* [generic] Improve support for pornhub.com embeds (#11100)
+ [generic] Add support for redtube.com embeds (#11099)
+ [generic] Add support for drtuber.com embeds (#11098)
+ [redtube] Add support for embed URLs
+ [drtuber] Add support for embed URLs
+ [yahoo] Improve content id extraction (#11088)
* [toutv] Relax URL regular expression (#11121)
version 2016.11.04
Core
* [extractor/common] Tolerate malformed RESOLUTION attribute in m3u8
manifests (#11113)
* [downloader/ism] Fix AVC Decoder Configuration Record
Extractors
+ [fox9] Add support for fox9.com (#11110)
+ [anvato] Extract more metadata and improve formats extraction
* [vodlocker] Improve removed videos detection (#11106)
+ [vzaar] Add support for vzaar.com (#11093)
+ [vice] Add support for uplynk preplay videos (#11101)
* [tubitv] Fix extraction (#11061)
+ [shahid] Add support for authentication (#11091)
+ [radiocanada] Add subtitles support (#11096)
+ [generic] Add support for ISM manifests
version 2016.11.02
Core

View File

@@ -1,7 +1,7 @@
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
clean:
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
find . -name "*.pyc" -delete
find . -name "*.class" -delete

View File

@@ -758,7 +758,7 @@ Once the video is fully downloaded, use any video player, such as [mpv](https://
### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser.
It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl.
It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies and/or HTTP headers. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl. You can also get necessary cookies and HTTP headers from JSON output obtained with `--dump-json`.
It may be beneficial to use IPv6; in some cases, the restrictions are only applied to IPv4. Some services (sometimes only for a subset of videos) do not restrict the video URL by IP address, cookie, or user-agent, but these are the exception rather than the rule.

View File

@@ -25,5 +25,6 @@ def build_completion(opt_parser):
filled_template = template.replace("{{flags}}", " ".join(opts_flag))
f.write(filled_template)
parser = youtube_dl.parseOpts()[0]
build_completion(parser)

View File

@@ -2,11 +2,13 @@
from __future__ import unicode_literals
import base64
import io
import json
import mimetypes
import netrc
import optparse
import os
import re
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -90,16 +92,23 @@ class GitHubReleaser(object):
def main():
parser = optparse.OptionParser(usage='%prog VERSION BUILDPATH')
parser = optparse.OptionParser(usage='%prog CHANGELOG VERSION BUILDPATH')
options, args = parser.parse_args()
if len(args) != 2:
if len(args) != 3:
parser.error('Expected a version and a build directory')
version, build_path = args
changelog_file, version, build_path = args
with io.open(changelog_file, encoding='utf-8') as inf:
changelog = inf.read()
mobj = re.search(r'(?s)version %s\n{2}(.+?)\n{3}' % version, changelog)
body = mobj.group(1) if mobj else ''
releaser = GitHubReleaser()
new_release = releaser.create_release(version, name='youtube-dl %s' % version)
new_release = releaser.create_release(
version, name='youtube-dl %s' % version, body=body)
release_id = new_release['id']
for asset in os.listdir(build_path):

View File

@@ -44,5 +44,6 @@ def build_completion(opt_parser):
with open(FISH_COMPLETION_FILE, 'w') as f:
f.write(filled_template)
parser = youtube_dl.parseOpts()[0]
build_completion(parser)

View File

@@ -23,6 +23,7 @@ def openssl_encode(algo, key, iv):
out, _ = prog.communicate(secret_msg)
return out
iv = key = [0x20, 0x15] + 14 * [0]
r = openssl_encode('aes-128-cbc', key, iv)

View File

@@ -32,5 +32,6 @@ def main():
with open('supportedsites.html', 'w', encoding='utf-8') as sitesf:
sitesf.write(template)
if __name__ == '__main__':
main()

View File

@@ -28,5 +28,6 @@ def main():
with io.open(outfile, 'w', encoding='utf-8') as outf:
outf.write(out)
if __name__ == '__main__':
main()

View File

@@ -59,6 +59,7 @@ def build_lazy_ie(ie, name):
s += make_valid_template.format(valid_url=ie._make_valid_url())
return s
# find the correct sorting and add the required base classes so that sublcasses
# can be correctly created
classes = _ALL_CLASSES[:-1]

View File

@@ -41,5 +41,6 @@ def main():
with io.open(outfile, 'w', encoding='utf-8') as outf:
outf.write(out)
if __name__ == '__main__':
main()

View File

@@ -74,5 +74,6 @@ def filter_options(readme):
return ret
if __name__ == '__main__':
main()

View File

@@ -110,7 +110,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
ROOT=$(pwd)
python devscripts/create-github-release.py $version "$ROOT/build/$version"
python devscripts/create-github-release.py ChangeLog $version "$ROOT/build/$version"
ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"

View File

@@ -44,5 +44,6 @@ def build_completion(opt_parser):
with open(ZSH_COMPLETION_FILE, "w") as f:
f.write(template)
parser = youtube_dl.parseOpts()[0]
build_completion(parser)

View File

@@ -225,6 +225,7 @@
- **EroProfile**
- **Escapist**
- **ESPN**
- **ESPNArticle**
- **EsriVideo**
- **Europa**
- **EveryonesMixtape**
@@ -247,6 +248,7 @@
- **FootyRoom**
- **Formula1**
- **FOX**
- **FOX9**
- **Foxgay**
- **foxnews**: Fox News and Fox Business Video
- **foxnews:article**
@@ -870,6 +872,7 @@
- **vube**: Vube.com
- **VuClip**
- **VyboryMos**
- **Vzaar**
- **Walla**
- **washingtonpost**
- **washingtonpost:article**

View File

@@ -84,5 +84,6 @@ class TestInfoExtractor(unittest.TestCase):
self.assertRaises(ExtractorError, self.ie._download_json, uri, None)
self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
if __name__ == '__main__':
unittest.main()

View File

@@ -51,5 +51,6 @@ class TestAES(unittest.TestCase):
decrypted = (aes_decrypt_text(encrypted, password, 32))
self.assertEqual(decrypted, self.secret_msg)
if __name__ == '__main__':
unittest.main()

View File

@@ -60,6 +60,7 @@ def _file_md5(fn):
with open(fn, 'rb') as f:
return hashlib.md5(f.read()).hexdigest()
defs = gettestcases()
@@ -217,6 +218,7 @@ def generator(test_case):
return test_template
# And add them to TestDownload
for n, test_case in enumerate(defs):
test_method = generator(test_case)

View File

@@ -39,5 +39,6 @@ class TestExecution(unittest.TestCase):
_, stderr = p.communicate()
self.assertFalse(stderr)
if __name__ == '__main__':
unittest.main()

View File

@@ -169,5 +169,6 @@ class TestProxy(unittest.TestCase):
# b'xn--fiq228c' is '中文'.encode('idna')
self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
if __name__ == '__main__':
unittest.main()

View File

@@ -43,5 +43,6 @@ class TestIqiyiSDKInterpreter(unittest.TestCase):
ie._login()
self.assertTrue('unable to log in:' in logger.messages[0])
if __name__ == '__main__':
unittest.main()

View File

@@ -104,6 +104,14 @@ class TestJSInterpreter(unittest.TestCase):
}''')
self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
def test_call(self):
jsi = JSInterpreter('''
function x() { return 2; }
function y(a) { return x() + a; }
function z() { return y(3); }
''')
self.assertEqual(jsi.call_function('z'), 5)
if __name__ == '__main__':
unittest.main()

View File

@@ -1075,5 +1075,6 @@ The first line
self.assertEqual(get_element_by_class('foo', html), 'nice')
self.assertEqual(get_element_by_class('no-such-class', html), None)
if __name__ == '__main__':
unittest.main()

View File

@@ -66,5 +66,6 @@ class TestVerboseOutput(unittest.TestCase):
self.assertTrue(b'-p' in serr)
self.assertTrue(b'secret' not in serr)
if __name__ == '__main__':
unittest.main()

View File

@@ -24,6 +24,7 @@ class YoutubeDL(youtube_dl.YoutubeDL):
super(YoutubeDL, self).__init__(*args, **kwargs)
self.to_stderr = self.to_screen
params = get_params({
'writeannotations': True,
'skip_download': True,
@@ -74,5 +75,6 @@ class TestAnnotations(unittest.TestCase):
def tearDown(self):
try_rm(ANNOTATIONS_FILE)
if __name__ == '__main__':
unittest.main()

View File

@@ -66,5 +66,6 @@ class TestYoutubeLists(unittest.TestCase):
for entry in result['entries']:
self.assertTrue(entry.get('title'))
if __name__ == '__main__':
unittest.main()

View File

@@ -114,6 +114,7 @@ def make_tfunc(url, stype, sig_input, expected_sig):
test_func.__name__ = str('test_signature_' + stype + '_' + test_id)
setattr(TestSignature, test_func.__name__, test_func)
for test_spec in _TESTS:
make_tfunc(*test_spec)

View File

@@ -95,8 +95,7 @@ def _real_main(argv=None):
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
except IOError:
sys.exit('ERROR: batch file could not be read')
all_urls = batch_urls + args
all_urls = [url.strip() for url in all_urls]
all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls
_enc = preferredencoding()
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
@@ -450,4 +449,5 @@ def main(argv=None):
except KeyboardInterrupt:
sys.exit('\nERROR: Interrupted by user')
__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']

View File

@@ -174,6 +174,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
return plaintext
RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
@@ -328,4 +329,5 @@ def inc(data):
break
return data
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']

View File

@@ -2491,6 +2491,7 @@ class _TreeBuilder(etree.TreeBuilder):
def doctype(self, name, pubid, system):
pass
if sys.version_info[0] >= 3:
def compat_etree_fromstring(text):
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
@@ -2787,6 +2788,7 @@ def workaround_optparse_bug9161():
return real_add_option(self, *bargs, **bkwargs)
optparse.OptionGroup.add_option = _compat_add_option
if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
compat_get_terminal_size = shutil.get_terminal_size
else:

View File

@@ -293,6 +293,7 @@ class FFmpegFD(ExternalFD):
class AVconvFD(FFmpegFD):
pass
_BY_NAME = dict(
(klass.get_basename(), klass)
for name, klass in globals().items()

View File

@@ -314,7 +314,8 @@ class F4mFD(FragmentFD):
man_url = info_dict['url']
requested_bitrate = info_dict.get('tbr')
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
urlh = self.ydl.urlopen(man_url)
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
man_url = urlh.geturl()
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
# (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244
@@ -387,7 +388,10 @@ class F4mFD(FragmentFD):
url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
frag_filename = '%s-%s' % (ctx['tmpfilename'], name)
try:
success = ctx['dl'].download(frag_filename, {'url': url_parsed.geturl()})
success = ctx['dl'].download(frag_filename, {
'url': url_parsed.geturl(),
'http_headers': info_dict.get('http_headers'),
})
if not success:
return False
(down, frag_sanitized) = sanitize_open(frag_filename, 'rb')

View File

@@ -9,6 +9,7 @@ from ..utils import (
error_to_compat_str,
encodeFilename,
sanitize_open,
sanitized_Request,
)
@@ -37,6 +38,10 @@ class FragmentFD(FileDownloader):
def report_skip_fragment(self, fragment_name):
self.to_screen('[download] Skipping fragment %s...' % fragment_name)
def _prepare_url(self, info_dict, url):
headers = info_dict.get('http_headers')
return sanitized_Request(url, None, headers) if headers else url
def _prepare_and_start_frag_download(self, ctx):
self._prepare_frag_download(ctx)
self._start_frag_download(ctx)

View File

@@ -59,7 +59,8 @@ class HlsFD(FragmentFD):
def real_download(self, filename, info_dict):
man_url = info_dict['url']
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
manifest = self.ydl.urlopen(man_url).read()
manifest = self.ydl.urlopen(self._prepare_url(info_dict, man_url)).read()
s = manifest.decode('utf-8', 'ignore')
@@ -112,7 +113,10 @@ class HlsFD(FragmentFD):
count = 0
while count <= fragment_retries:
try:
success = ctx['dl'].download(frag_filename, {'url': frag_url})
success = ctx['dl'].download(frag_filename, {
'url': frag_url,
'http_headers': info_dict.get('http_headers'),
})
if not success:
return False
down, frag_sanitized = sanitize_open(frag_filename, 'rb')

View File

@@ -129,7 +129,7 @@ def write_piff_header(stream, params):
sample_entry_payload += u1616.pack(params['sampling_rate'])
if fourcc == 'AACL':
smaple_entry_box = box(b'mp4a', sample_entry_payload)
sample_entry_box = box(b'mp4a', sample_entry_payload)
else:
sample_entry_payload = sample_entry_payload
sample_entry_payload += u16.pack(0) # pre defined
@@ -149,9 +149,7 @@ def write_piff_header(stream, params):
if fourcc in ('H264', 'AVC1'):
sps, pps = codec_private_data.split(u32.pack(1))[1:]
avcc_payload = u8.pack(1) # configuration version
avcc_payload += sps[1] # avc profile indication
avcc_payload += sps[2] # profile compatibility
avcc_payload += sps[3] # avc level indication
avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication
avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete represenation (1) + reserved (11111) + length size minus one
avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001)
avcc_payload += u16.pack(len(sps))
@@ -160,8 +158,8 @@ def write_piff_header(stream, params):
avcc_payload += u16.pack(len(pps))
avcc_payload += pps
sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record
smaple_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry
stsd_payload += smaple_entry_box
sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry
stsd_payload += sample_entry_box
stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box

View File

@@ -11,6 +11,7 @@ from ..compat import (
from ..utils import (
ExtractorError,
int_or_none,
update_url_query,
xpath_element,
xpath_text,
)
@@ -18,12 +19,18 @@ from ..utils import (
class AfreecaTVIE(InfoExtractor):
IE_DESC = 'afreecatv.com'
_VALID_URL = r'''(?x)^
https?://(?:(live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
(?:
/app/(?:index|read_ucc_bbs)\.cgi|
/player/[Pp]layer\.(?:swf|html))
\?.*?\bnTitleNo=(?P<id>\d+)'''
_VALID_URL = r'''(?x)
https?://
(?:
(?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
(?:
/app/(?:index|read_ucc_bbs)\.cgi|
/player/[Pp]layer\.(?:swf|html)
)\?.*?\bnTitleNo=|
vod\.afreecatv\.com/PLAYER/STATION/
)
(?P<id>\d+)
'''
_TESTS = [{
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
@@ -66,6 +73,9 @@ class AfreecaTVIE(InfoExtractor):
}, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
'only_matching': True,
}, {
'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030',
'only_matching': True,
}]
@staticmethod
@@ -83,7 +93,9 @@ class AfreecaTVIE(InfoExtractor):
info_url = compat_urlparse.urlunparse(parsed_url._replace(
netloc='afbbs.afreecatv.com:8080',
path='/api/video/get_video_info.php'))
video_xml = self._download_xml(info_url, video_id)
video_xml = self._download_xml(
update_url_query(info_url, {'nTitleNo': video_id}), video_id)
if xpath_element(video_xml, './track/video/file') is None:
raise ExtractorError('Specified AfreecaTV video does not exist',

View File

@@ -157,22 +157,16 @@ class AnvatoIE(InfoExtractor):
video_data_url, video_id, transform_source=strip_jsonp,
data=json.dumps(payload).encode('utf-8'))
def _extract_anvato_videos(self, webpage, video_id):
anvplayer_data = self._parse_json(self._html_search_regex(
r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
'Anvato player data'), video_id)
video_id = anvplayer_data['video']
access_key = anvplayer_data['accessKey']
def _get_anvato_videos(self, access_key, video_id):
video_data = self._get_video_json(access_key, video_id)
formats = []
for published_url in video_data['published_urls']:
video_url = published_url['embed_url']
media_format = published_url.get('format')
ext = determine_ext(video_url)
if ext == 'smil':
if ext == 'smil' or media_format == 'smil':
formats.extend(self._extract_smil_formats(video_url, video_id))
continue
@@ -183,7 +177,7 @@ class AnvatoIE(InfoExtractor):
'tbr': tbr if tbr != 0 else None,
}
if ext == 'm3u8':
if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'):
# Not using _extract_m3u8_formats here as individual media
# playlists are also included in published_urls.
if tbr is None:
@@ -194,7 +188,7 @@ class AnvatoIE(InfoExtractor):
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
'ext': 'mp4',
})
elif ext == 'mp3':
elif ext == 'mp3' or media_format == 'mp3':
a_format['vcodec'] = 'none'
else:
a_format.update({
@@ -218,7 +212,19 @@ class AnvatoIE(InfoExtractor):
'formats': formats,
'title': video_data.get('def_title'),
'description': video_data.get('def_description'),
'tags': video_data.get('def_tags', '').split(','),
'categories': video_data.get('categories'),
'thumbnail': video_data.get('thumbnail'),
'timestamp': int_or_none(video_data.get(
'ts_published') or video_data.get('ts_added')),
'uploader': video_data.get('mcp_id'),
'duration': int_or_none(video_data.get('duration')),
'subtitles': subtitles,
}
def _extract_anvato_videos(self, webpage, video_id):
anvplayer_data = self._parse_json(self._html_search_regex(
r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
'Anvato player data'), video_id)
return self._get_anvato_videos(
anvplayer_data['accessKey'], anvplayer_data['video'])

View File

@@ -178,8 +178,6 @@ class ARDMediathekIE(InfoExtractor):
('>Leider liegt eine Störung vor.', 'Video %s is unavailable'),
('>Der gewünschte Beitrag ist nicht mehr verfügbar.<',
'Video %s is no longer available'),
('Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.',
'This program is only suitable for those aged 12 and older. Video %s is therefore only available between 8 pm and 6 am.'),
)
for pattern, message in ERRORS:

View File

@@ -6,8 +6,8 @@ from ..utils import float_or_none
class AudioBoomIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?audioboom\.com/boos/(?P<id>[0-9]+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://audioboom.com/boos/4279833-3-09-2016-czaban-hour-3?t=0',
'md5': '63a8d73a055c6ed0f1e51921a10a5a76',
'info_dict': {
@@ -19,7 +19,10 @@ class AudioBoomIE(InfoExtractor):
'uploader': 'Steve Czaban',
'uploader_url': 're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
}
}
}, {
'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)

View File

@@ -22,6 +22,7 @@ class CBSLocalIE(AnvatoIE):
'thumbnail': 're:^https?://.*',
'timestamp': 1463440500,
'upload_date': '20160516',
'uploader': 'CBS',
'subtitles': {
'en': 'mincount:5',
},
@@ -35,6 +36,7 @@ class CBSLocalIE(AnvatoIE):
'Syndication\\Curb.tv',
'Content\\News'
],
'tags': ['CBS 2 News Evening'],
},
}, {
# SendtoNews embed

View File

@@ -5,14 +5,16 @@ import re
from .common import InfoExtractor
from ..utils import (
decode_packed_codes,
ExtractorError,
parse_duration
float_or_none,
int_or_none,
parse_duration,
)
class CDAIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
_BASE_URL = 'http://www.cda.pl/'
_TESTS = [{
'url': 'http://www.cda.pl/video/5749950c',
'md5': '6f844bf51b15f31fae165365707ae970',
@@ -21,6 +23,9 @@ class CDAIE(InfoExtractor):
'ext': 'mp4',
'height': 720,
'title': 'Oto dlaczego przed zakrętem należy zwolnić.',
'description': 'md5:269ccd135d550da90d1662651fcb9772',
'thumbnail': 're:^https?://.*\.jpg$',
'average_rating': float,
'duration': 39
}
}, {
@@ -30,6 +35,11 @@ class CDAIE(InfoExtractor):
'id': '57413289',
'ext': 'mp4',
'title': 'Lądowanie na lotnisku na Maderze',
'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'crash404',
'view_count': int,
'average_rating': float,
'duration': 137
}
}, {
@@ -39,31 +49,55 @@ class CDAIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage('http://ebd.cda.pl/0x0/' + video_id, video_id)
self._set_cookie('cda.pl', 'cda.player', 'html5')
webpage = self._download_webpage(
self._BASE_URL + '/video/' + video_id, video_id)
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
raise ExtractorError('This video is only available for premium users.', expected=True)
title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
formats = []
uploader = self._search_regex(r'''(?x)
<(span|meta)[^>]+itemprop=(["\'])author\2[^>]*>
(?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
<(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
''', webpage, 'uploader', default=None, group='uploader')
view_count = self._search_regex(
r'Odsłony:(?:\s|&nbsp;)*([0-9]+)', webpage,
'view_count', default=None)
average_rating = self._search_regex(
r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
webpage, 'rating', fatal=False, group='rating_value')
info_dict = {
'id': video_id,
'title': title,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'uploader': uploader,
'view_count': int_or_none(view_count),
'average_rating': float_or_none(average_rating),
'thumbnail': self._og_search_thumbnail(webpage),
'formats': formats,
'duration': None,
}
def extract_format(page, version):
unpacked = decode_packed_codes(page)
format_url = self._search_regex(
r"(?:file|url)\s*:\s*(\\?[\"'])(?P<url>http.+?)\1", unpacked,
'%s url' % version, fatal=False, group='url')
if not format_url:
json_str = self._search_regex(
r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
'%s player_json' % version, fatal=False, group='player_data')
if not json_str:
return
player_data = self._parse_json(
json_str, '%s player_data' % version, fatal=False)
if not player_data:
return
video = player_data.get('video')
if not video or 'file' not in video:
self.report_warning('Unable to extract %s version information' % version)
return
f = {
'url': format_url,
'url': video['file'],
}
m = re.search(
r'<a[^>]+data-quality="(?P<format_id>[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p',
@@ -75,9 +109,7 @@ class CDAIE(InfoExtractor):
})
info_dict['formats'].append(f)
if not info_dict['duration']:
info_dict['duration'] = parse_duration(self._search_regex(
r"duration\s*:\s*(\\?[\"'])(?P<duration>.+?)\1",
unpacked, 'duration', fatal=False, group='duration'))
info_dict['duration'] = parse_duration(video.get('duration'))
extract_format(webpage, 'default')
@@ -85,7 +117,8 @@ class CDAIE(InfoExtractor):
r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
webpage):
webpage = self._download_webpage(
href, video_id, 'Downloading %s version information' % resolution, fatal=False)
self._BASE_URL + href, video_id,
'Downloading %s version information' % resolution, fatal=False)
if not webpage:
# Manually report warning because empty page is returned when
# invalid version is requested.

View File

@@ -886,7 +886,7 @@ class InfoExtractor(object):
'url': e.get('contentUrl'),
'title': unescapeHTML(e.get('name')),
'description': unescapeHTML(e.get('description')),
'thumbnail': e.get('thumbnailUrl'),
'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
'duration': parse_duration(e.get('duration')),
'timestamp': unified_timestamp(e.get('uploadDate')),
'filesize': float_or_none(e.get('contentSize')),
@@ -1280,9 +1280,10 @@ class InfoExtractor(object):
}
resolution = last_info.get('RESOLUTION')
if resolution:
width_str, height_str = resolution.split('x')
f['width'] = int(width_str)
f['height'] = int(height_str)
mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
if mobj:
f['width'] = int(mobj.group('width'))
f['height'] = int(mobj.group('height'))
# Unified Streaming Platform
mobj = re.search(
r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url'])
@@ -1702,7 +1703,7 @@ class InfoExtractor(object):
representation_ms_info['fragments'] = [{
'url': media_template % {
'Number': segment_number,
'Bandwidth': representation_attrib.get('bandwidth'),
'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
},
'duration': segment_duration,
} for segment_number in range(
@@ -1720,7 +1721,7 @@ class InfoExtractor(object):
def add_segment_url():
segment_url = media_template % {
'Time': segment_time,
'Bandwidth': representation_attrib.get('bandwidth'),
'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
'Number': segment_number,
}
representation_ms_info['fragments'].append({

View File

@@ -10,8 +10,8 @@ from ..utils import (
class DrTuberIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P<id>\d+)/(?P<display_id>[\w-]+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?drtuber\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?'
_TESTS = [{
'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
'md5': '93e680cf2536ad0dfb7e74d94a89facd',
'info_dict': {
@@ -25,20 +25,30 @@ class DrTuberIE(InfoExtractor):
'thumbnail': 're:https?://.*\.jpg$',
'age_limit': 18,
}
}
}, {
'url': 'http://www.drtuber.com/embed/489939',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?drtuber\.com/embed/\d+)',
webpage)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
display_id = mobj.group('display_id') or video_id
webpage = self._download_webpage(url, display_id)
webpage = self._download_webpage(
'http://www.drtuber.com/video/%s' % video_id, display_id)
video_url = self._html_search_regex(
r'<source src="([^"]+)"', webpage, 'video URL')
title = self._html_search_regex(
(r'class="title_watch"[^>]*><p>([^<]+)<',
(r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
r'<p[^>]+class="title_substrate">([^<]+)</p>',
r'<title>([^<]+) - \d+'),
webpage, 'title')

View File

@@ -4,11 +4,13 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..compat import (
compat_HTTPError,
compat_str,
)
from ..utils import (
ExtractorError,
int_or_none,
url_basename,
)
@@ -77,7 +79,7 @@ class EaglePlatformIE(InfoExtractor):
if status != 200:
raise ExtractorError(' '.join(response['errors']), expected=True)
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', *args, **kwargs):
try:
response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
except ExtractorError as ee:
@@ -116,29 +118,38 @@ class EaglePlatformIE(InfoExtractor):
m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id,
'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)
formats.extend(m3u8_formats)
mp4_url = self._get_video_url(
m3u8_formats_dict = {}
for f in m3u8_formats:
if f.get('height') is not None:
m3u8_formats_dict[f['height']] = f
mp4_data = self._download_json(
# Secure mp4 URL is constructed according to Player.prototype.mp4 from
# http://lentaru.media.eagleplatform.com/player/player.js
re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4', secure_m3u8),
video_id, 'Downloading mp4 JSON')
mp4_url_basename = url_basename(mp4_url)
for m3u8_format in m3u8_formats:
mobj = re.search('/([^/]+)/index\.m3u8', m3u8_format['url'])
if mobj:
http_format = m3u8_format.copy()
video_url = mp4_url.replace(mp4_url_basename, mobj.group(1))
if not self._is_valid_url(video_url, video_id):
re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4s', secure_m3u8),
video_id, 'Downloading mp4 JSON', fatal=False)
if mp4_data:
for format_id, format_url in mp4_data.get('data', {}).items():
if not isinstance(format_url, compat_str):
continue
http_format.update({
'url': video_url,
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
'protocol': 'http',
})
formats.append(http_format)
height = int_or_none(format_id)
if height is not None and m3u8_formats_dict.get(height):
f = m3u8_formats_dict[height].copy()
f.update({
'format_id': f['format_id'].replace('hls', 'http'),
'protocol': 'http',
})
else:
f = {
'format_id': 'http-%s' % format_id,
'height': int_or_none(format_id),
}
f['url'] = format_url
formats.append(f)
self._sort_formats(formats)

View File

@@ -1,38 +1,117 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import remove_end
from ..compat import compat_str
from ..utils import (
determine_ext,
int_or_none,
unified_timestamp,
)
class ESPNIE(InfoExtractor):
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/video/clip(?:\?.*?\bid=|/_/id/)(?P<id>\d+)'
_TESTS = [{
'url': 'http://espn.go.com/video/clip?id=10365079',
'md5': '60e5d097a523e767d06479335d1bdc58',
'info_dict': {
'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
'id': '10365079',
'ext': 'mp4',
'title': '30 for 30 Shorts: Judging Jewell',
'description': None,
'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f',
'timestamp': 1390936111,
'upload_date': '20140128',
},
'params': {
'skip_download': True,
},
'add_ie': ['OoyalaExternal'],
}, {
# intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
'url': 'http://espn.go.com/video/clip?id=2743663',
'md5': 'f4ac89b59afc7e2d7dbb049523df6768',
'info_dict': {
'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
'id': '2743663',
'ext': 'mp4',
'title': 'Must-See Moments: Best of the MLS season',
'description': 'md5:4c2d7232beaea572632bec41004f0aeb',
'timestamp': 1449446454,
'upload_date': '20151207',
},
'params': {
'skip_download': True,
},
'add_ie': ['OoyalaExternal'],
'expected_warnings': ['Unable to download f4m manifest'],
}, {
'url': 'http://www.espn.com/video/clip?id=10365079',
'only_matching': True,
}, {
'url': 'http://www.espn.com/video/clip/_/id/17989860',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
clip = self._download_json(
'http://api-app.espn.com/v1/video/clips/%s' % video_id,
video_id)['videos'][0]
title = clip['headline']
format_urls = set()
formats = []
def traverse_source(source, base_source_id=None):
for source_id, source in source.items():
if isinstance(source, compat_str):
extract_source(source, base_source_id)
elif isinstance(source, dict):
traverse_source(
source,
'%s-%s' % (base_source_id, source_id)
if base_source_id else source_id)
def extract_source(source_url, source_id=None):
if source_url in format_urls:
return
format_urls.add(source_url)
ext = determine_ext(source_url)
if ext == 'smil':
formats.extend(self._extract_smil_formats(
source_url, video_id, fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
source_url, video_id, f4m_id=source_id, fatal=False))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id=source_id, fatal=False))
else:
formats.append({
'url': source_url,
'format_id': source_id,
})
traverse_source(clip['links']['source'])
self._sort_formats(formats)
description = clip.get('caption') or clip.get('description')
thumbnail = clip.get('thumbnail')
duration = int_or_none(clip.get('duration'))
timestamp = unified_timestamp(clip.get('originalPublishDate'))
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'formats': formats,
}
class ESPNArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
_TESTS = [{
'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
'only_matching': True,
}, {
@@ -47,11 +126,12 @@ class ESPNIE(InfoExtractor):
}, {
'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',
'only_matching': True,
}, {
'url': 'http://www.espn.com/video/clip?id=10365079',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if ESPNIE.suitable(url) else super(ESPNArticleIE, cls).suitable(url)
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -61,23 +141,5 @@ class ESPNIE(InfoExtractor):
r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
webpage, 'video id', group='id')
cms = 'espn'
if 'data-source="intl"' in webpage:
cms = 'intl'
player_url = 'https://espn.go.com/video/iframe/twitter/?id=%s&cms=%s' % (video_id, cms)
player = self._download_webpage(
player_url, video_id)
pcode = self._search_regex(
r'["\']pcode=([^"\']+)["\']', player, 'pcode')
title = remove_end(
self._og_search_title(webpage),
'- ESPN Video').strip()
return {
'_type': 'url_transparent',
'url': 'ooyalaexternal:%s:%s:%s' % (cms, video_id, pcode),
'ie_key': 'OoyalaExternal',
'title': title,
}
return self.url_result(
'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())

View File

@@ -267,7 +267,10 @@ from .engadget import EngadgetIE
from .eporner import EpornerIE
from .eroprofile import EroProfileIE
from .escapist import EscapistIE
from .espn import ESPNIE
from .espn import (
ESPNIE,
ESPNArticleIE,
)
from .esri import EsriVideoIE
from .europa import EuropaIE
from .everyonesmixtape import EveryonesMixtapeIE
@@ -296,6 +299,7 @@ from .footyroom import FootyRoomIE
from .formula1 import Formula1IE
from .fourtube import FourTubeIE
from .fox import FOXIE
from .fox9 import FOX9IE
from .foxgay import FoxgayIE
from .foxnews import (
FoxNewsIE,
@@ -1101,6 +1105,7 @@ from .vrt import VRTIE
from .vube import VubeIE
from .vuclip import VuClipIE
from .vyborymos import VyboryMosIE
from .vzaar import VzaarIE
from .walla import WallaIE
from .washingtonpost import (
WashingtonPostIE,

View File

@@ -0,0 +1,43 @@
# coding: utf-8
from __future__ import unicode_literals
from .anvato import AnvatoIE
from ..utils import js_to_json
class FOX9IE(AnvatoIE):
_VALID_URL = r'https?://(?:www\.)?fox9\.com/(?:[^/]+/)+(?P<id>\d+)-story'
_TESTS = [{
'url': 'http://www.fox9.com/news/215123287-story',
'md5': 'd6e1b2572c3bab8a849c9103615dd243',
'info_dict': {
'id': '314473',
'ext': 'mp4',
'title': 'Bear climbs tree in downtown Duluth',
'description': 'md5:6a36bfb5073a411758a752455408ac90',
'duration': 51,
'timestamp': 1478123580,
'upload_date': '20161102',
'uploader': 'EPFOX',
'categories': ['News', 'Sports'],
'tags': ['news', 'video'],
},
}, {
'url': 'http://www.fox9.com/news/investigators/214070684-story',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_id = self._parse_json(
self._search_regex(
r'AnvatoPlaylist\s*\(\s*(\[.+?\])\s*\)\s*;',
webpage, 'anvato playlist'),
video_id, transform_source=js_to_json)[0]['video']
return self._get_anvato_videos(
'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b',
video_id)

View File

@@ -29,7 +29,7 @@ class FranceCultureIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
video_url = self._search_regex(
r'(?s)<div[^>]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?<a[^>]+href="([^"]+)"',
r'(?s)<div[^>]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?<button[^>]+data-asset-source="([^"]+)"',
webpage, 'video path')
title = self._og_search_title(webpage)
@@ -38,7 +38,7 @@ class FranceCultureIE(InfoExtractor):
'(?s)<div[^>]+class="date"[^>]*>.*?<span[^>]+class="inner"[^>]*>([^<]+)<',
webpage, 'upload date', fatal=False))
thumbnail = self._search_regex(
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+data-pagespeed-(?:lazy|high-res)-src="([^"]+)"',
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+data-dejavu-src="([^"]+)"',
webpage, 'thumbnail', fatal=False)
uploader = self._html_search_regex(
r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',

View File

@@ -47,6 +47,8 @@ from .svt import SVTIE
from .pornhub import PornHubIE
from .xhamster import XHamsterEmbedIE
from .tnaflix import TNAFlixNetworkEmbedIE
from .drtuber import DrTuberIE
from .redtube import RedTubeIE
from .vimeo import VimeoIE
from .dailymotion import (
DailymotionIE,
@@ -1634,6 +1636,10 @@ class GenericIE(InfoExtractor):
doc = compat_etree_fromstring(webpage.encode('utf-8'))
if doc.tag == 'rss':
return self._extract_rss(url, video_id, doc)
elif doc.tag == 'SmoothStreamingMedia':
info_dict['formats'] = self._parse_ism_formats(doc, url)
self._sort_formats(info_dict['formats'])
return info_dict
elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
smil = self._parse_smil(doc, url, video_id)
self._sort_formats(smil['formats'])
@@ -1977,11 +1983,6 @@ class GenericIE(InfoExtractor):
if sportbox_urls:
return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
# Look for embedded PornHub player
pornhub_url = PornHubIE._extract_url(webpage)
if pornhub_url:
return self.url_result(pornhub_url, 'PornHub')
# Look for embedded XHamster player
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
if xhamster_urls:
@@ -1992,6 +1993,21 @@ class GenericIE(InfoExtractor):
if tnaflix_urls:
return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
# Look for embedded PornHub player
pornhub_urls = PornHubIE._extract_urls(webpage)
if pornhub_urls:
return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key())
# Look for embedded DrTuber player
drtuber_urls = DrTuberIE._extract_urls(webpage)
if drtuber_urls:
return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key())
# Look for embedded RedTube player
redtube_urls = RedTubeIE._extract_urls(webpage)
if redtube_urls:
return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key())
# Look for embedded Tvigle player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
@@ -2449,6 +2465,21 @@ class GenericIE(InfoExtractor):
entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
elif ext == 'f4m':
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
# Just matching .ism/manifest is not enough to be reliably sure
# whether it's actually an ISM manifest or some other streaming
# manifest since there are various streaming URL formats
# possible (see [1]) as well as some other shenanigans like
# .smil/manifest URLs that actually serve an ISM (see [2]) and
# so on.
# Thus the most reasonable way to solve this is to delegate
# to generic extractor in order to look into the contents of
# the manifest itself.
# 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
# 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
entry_info_dict = self.url_result(
smuggle_url(video_url, {'to_generic': True}),
GenericIE.ie_key())
else:
entry_info_dict['url'] = video_url

View File

@@ -1,19 +1,20 @@
# coding: utf-8
from __future__ import unicode_literals
import re
import uuid
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse_urlencode,
compat_urlparse,
)
from ..utils import (
get_element_by_attribute,
int_or_none,
remove_start,
extract_attributes,
determine_ext,
smuggle_url,
parse_duration,
)
@@ -72,16 +73,14 @@ class MiTeleBaseIE(InfoExtractor):
}
class MiTeleIE(MiTeleBaseIE):
class MiTeleIE(InfoExtractor):
IE_DESC = 'mitele.es'
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/){3}(?P<id>[^/]+)/'
_VALID_URL = r'https?://(?:www\.)?mitele\.es/programas-tv/(?:[^/]+/)(?P<id>[^/]+)/player'
_TESTS = [{
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
# MD5 is unstable
'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
'info_dict': {
'id': '0NF1jJnxS1Wu3pHrmvFyw2',
'display_id': 'programa-144',
'id': '57b0dfb9c715da65618b4afa',
'ext': 'mp4',
'title': 'Tor, la web invisible',
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
@@ -91,57 +90,71 @@ class MiTeleIE(MiTeleBaseIE):
'thumbnail': 're:(?i)^https?://.*\.jpg$',
'duration': 2913,
},
'add_ie': ['Ooyala'],
}, {
# no explicit title
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/temporada-6/programa-226/',
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
'info_dict': {
'id': 'eLZSwoEd1S3pVyUm8lc6F',
'display_id': 'programa-226',
'id': '57b0de3dc915da14058b4876',
'ext': 'mp4',
'title': 'Cuarto Milenio - Temporada 6 - Programa 226',
'description': 'md5:50daf9fadefa4e62d9fc866d0c015701',
'title': 'Cuarto Milenio Temporada 6 Programa 226',
'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
'series': 'Cuarto Milenio',
'season': 'Temporada 6',
'episode': 'Programa 226',
'thumbnail': 're:(?i)^https?://.*\.jpg$',
'duration': 7312,
'duration': 7313,
},
'params': {
'skip_download': True,
},
'add_ie': ['Ooyala'],
}]
def _real_extract(self, url):
display_id = self._match_id(url)
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
webpage = self._download_webpage(url, display_id)
gigya_url = self._search_regex(r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s*src="([^"]*)">[^>]*</script>', webpage, 'gigya', default=None)
gigya_sc = self._download_webpage(compat_urlparse.urljoin(r'http://www.mitele.es/', gigya_url), video_id, 'Downloading gigya script')
# Get a appKey/uuid for getting the session key
appKey_var = self._search_regex(r'value\("appGridApplicationKey",([0-9a-f]+)\)', gigya_sc, 'appKey variable')
appKey = self._search_regex(r'var %s="([0-9a-f]+)"' % appKey_var, gigya_sc, 'appKey')
uid = compat_str(uuid.uuid4())
session_url = 'https://appgrid-api.cloud.accedo.tv/session?appKey=%s&uuid=%s' % (appKey, uid)
session_json = self._download_json(session_url, video_id, 'Downloading session keys')
sessionKey = compat_str(session_json['sessionKey'])
info = self._get_player_info(url, webpage)
paths_url = 'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration?sessionKey=' + sessionKey
paths = self._download_json(paths_url, video_id, 'Downloading paths JSON')
ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search']
data_p = (
'http://' + ooyala_s['base_url'] + ooyala_s['full_path'] + ooyala_s['provider_id'] +
'/docs/' + video_id + '?include_titles=Series,Season&product_name=test&format=full')
data = self._download_json(data_p, video_id, 'Downloading data JSON')
source = data['hits']['hits'][0]['_source']
embedCode = source['offers'][0]['embed_codes'][0]
title = self._search_regex(
r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>',
webpage, 'title', default=None)
titles = source['localizable_titles'][0]
title = titles.get('title_medium') or titles['title_long']
episode = titles['title_sort_name']
description = titles['summary_long']
titles_series = source['localizable_titles_series'][0]
series = titles_series['title_long']
titles_season = source['localizable_titles_season'][0]
season = titles_season['title_medium']
duration = parse_duration(source['videos'][0]['duration'])
mobj = re.search(r'''(?sx)
class="Destacado-text"[^>]*>.*?<h1>\s*
<span>(?P<series>[^<]+)</span>\s*
<span>(?P<season>[^<]+)</span>\s*
<span>(?P<episode>[^<]+)</span>''', webpage)
series, season, episode = mobj.groups() if mobj else [None] * 3
if not title:
if mobj:
title = '%s - %s - %s' % (series, season, episode)
else:
title = remove_start(self._search_regex(
r'<title>([^<]+)</title>', webpage, 'title'), 'Ver online ')
info.update({
'display_id': display_id,
return {
'_type': 'url_transparent',
# for some reason only HLS is supported
'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8'}),
'id': video_id,
'title': title,
'description': get_element_by_attribute('class', 'text', webpage),
'description': description,
'series': series,
'season': season,
'episode': episode,
})
return info
'duration': duration,
'thumbnail': source['images'][0]['url'],
}

View File

@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
import random
import re
from .common import InfoExtractor
@@ -14,6 +15,25 @@ from ..utils import (
class NRKBaseIE(InfoExtractor):
_faked_ip = None
def _download_webpage_handle(self, *args, **kwargs):
# NRK checks X-Forwarded-For HTTP header in order to figure out the
# origin of the client behind proxy. This allows to bypass geo
# restriction by faking this header's value to some Norway IP.
# We will do so once we encounter any geo restriction error.
if self._faked_ip:
# NB: str is intentional
kwargs.setdefault(str('headers'), {})['X-Forwarded-For'] = self._faked_ip
return super(NRKBaseIE, self)._download_webpage_handle(*args, **kwargs)
def _fake_ip(self):
# Use fake IP from 37.191.128.0/17 in order to workaround geo
# restriction
def octet(lb=0, ub=255):
return random.randint(lb, ub)
self._faked_ip = '37.191.%d.%d' % (octet(128), octet())
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -24,6 +44,8 @@ class NRKBaseIE(InfoExtractor):
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
video_id = data.get('id') or video_id
http_headers = {'X-Forwarded-For': self._faked_ip} if self._faked_ip else {}
entries = []
media_assets = data.get('mediaAssets')
@@ -54,6 +76,7 @@ class NRKBaseIE(InfoExtractor):
'duration': duration,
'subtitles': subtitles,
'formats': formats,
'http_headers': http_headers,
})
if not entries:
@@ -70,10 +93,23 @@ class NRKBaseIE(InfoExtractor):
}]
if not entries:
if data.get('usageRights', {}).get('isGeoBlocked'):
raise ExtractorError(
'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
expected=True)
message_type = data.get('messageType', '')
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
if 'IsGeoBlocked' in message_type and not self._faked_ip:
self.report_warning(
'Video is geo restricted, trying to fake IP')
self._fake_ip()
return self._real_extract(url)
MESSAGES = {
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
'ProgramRightsHasExpired': 'Programmet har gått ut',
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
}
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, MESSAGES.get(
message_type, message_type)),
expected=True)
conviva = data.get('convivaStatistics') or {}
series = conviva.get('seriesName') or data.get('seriesTitle')

View File

@@ -18,7 +18,7 @@ class OoyalaBaseIE(InfoExtractor):
_CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
_AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?'
def _extract(self, content_tree_url, video_id, domain='example.org'):
def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None):
content_tree = self._download_json(content_tree_url, video_id)['content_tree']
metadata = content_tree[list(content_tree)[0]]
embed_code = metadata['embed_code']
@@ -29,7 +29,7 @@ class OoyalaBaseIE(InfoExtractor):
self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
compat_urllib_parse_urlencode({
'domain': domain,
'supportedFormats': 'mp4,rtmp,m3u8,hds',
'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds',
}), video_id)
cur_auth_data = auth_data['authorization_data'][embed_code]
@@ -145,8 +145,9 @@ class OoyalaIE(OoyalaBaseIE):
url, smuggled_data = unsmuggle_url(url, {})
embed_code = self._match_id(url)
domain = smuggled_data.get('domain')
supportedformats = smuggled_data.get('supportedformats')
content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code)
return self._extract(content_tree_url, embed_code, domain)
return self._extract(content_tree_url, embed_code, domain, supportedformats)
class OoyalaExternalIE(OoyalaBaseIE):

View File

@@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals, division
import re
from .common import InfoExtractor
from ..compat import (
compat_chr,
@@ -10,6 +12,10 @@ from ..utils import (
determine_ext,
ExtractorError,
)
from ..jsinterp import (
JSInterpreter,
_NAME_RE
)
class OpenloadIE(InfoExtractor):
@@ -56,6 +62,44 @@ class OpenloadIE(InfoExtractor):
'only_matching': True,
}]
def openload_decode(self, txt):
symbol_dict = {
'(゚Д゚) [゚Θ゚]': '_',
'(゚Д゚) [゚ω゚ノ]': 'a',
'(゚Д゚) [゚Θ゚ノ]': 'b',
'(゚Д゚) [\'c\']': 'c',
'(゚Д゚) [゚ー゚ノ]': 'd',
'(゚Д゚) [゚Д゚ノ]': 'e',
'(゚Д゚) [1]': 'f',
'(゚Д゚) [\'o\']': 'o',
'(o゚ー゚o)': 'u',
'(゚Д゚) [\'c\']': 'c',
'((゚ー゚) + (o^_^o))': '7',
'((o^_^o) +(o^_^o) +(c^_^o))': '6',
'((゚ー゚) + (゚Θ゚))': '5',
'(-~3)': '4',
'(-~-~1)': '3',
'(-~1)': '2',
'(-~0)': '1',
'((c^_^o)-(c^_^o))': '0',
}
delim = '(゚Д゚)[゚ε゚]+'
end_token = '(゚Д゚)[゚o゚]'
symbols = '|'.join(map(re.escape, symbol_dict.keys()))
txt = re.sub('(%s)\+\s?' % symbols, lambda m: symbol_dict[m.group(1)], txt)
ret = ''
for aacode in re.findall(r'{0}\+\s?{1}(.*?){0}'.format(re.escape(end_token), re.escape(delim)), txt):
for aachar in aacode.split(delim):
if aachar.isdigit():
ret += compat_chr(int(aachar, 8))
else:
m = re.match(r'^u([\da-f]{4})$', aachar)
if m:
ret += compat_chr(int(m.group(1), 16))
else:
self.report_warning("Cannot decode: %s" % aachar)
return ret
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id)
@@ -70,19 +114,26 @@ class OpenloadIE(InfoExtractor):
r'<span[^>]*>([^<]+)</span>\s*<span[^>]*>[^<]+</span>\s*<span[^>]+id="streamurl"',
webpage, 'encrypted data')
magic = compat_ord(enc_data[-1])
enc_code = self._html_search_regex(r'<script[^>]+>(゚ω゚[^<]+)</script>',
webpage, 'encrypted code')
js_code = self.openload_decode(enc_code)
jsi = JSInterpreter(js_code)
m_offset_fun = self._search_regex(r'slice\(0\s*-\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript offset function')
m_diff_fun = self._search_regex(r'charCodeAt\(0\)\s*\+\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript diff function')
offset = jsi.call_function(m_offset_fun)
diff = jsi.call_function(m_diff_fun)
video_url_chars = []
for idx, c in enumerate(enc_data):
j = compat_ord(c)
if j == magic:
j -= 1
elif j == magic - 1:
j += 1
if j >= 33 and j <= 126:
j = ((j + 14) % 94) + 33
if idx == len(enc_data) - 1:
j += 3
if idx == len(enc_data) - offset:
j += diff
video_url_chars += compat_chr(j)
video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars)

View File

@@ -8,30 +8,31 @@ from ..utils import int_or_none
class PlaysTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?plays\.tv/video/(?P<id>[0-9a-f]{18})'
_TEST = {
'url': 'http://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall',
_VALID_URL = r'https?://(?:www\.)?plays\.tv/(?:video|embeds)/(?P<id>[0-9a-f]{18})'
_TESTS = [{
'url': 'https://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall',
'md5': 'dfeac1198506652b5257a62762cec7bc',
'info_dict': {
'id': '56af17f56c95335490',
'ext': 'mp4',
'title': 'When you outplay the Azir wall',
'title': 'Bjergsen - When you outplay the Azir wall',
'description': 'Posted by Bjergsen',
}
}
}, {
'url': 'https://plays.tv/embeds/56af17f56c95335490',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
webpage = self._download_webpage(
'https://plays.tv/video/%s' % video_id, video_id)
info = self._search_json_ld(webpage, video_id,)
title = self._og_search_title(webpage)
content = self._parse_json(
self._search_regex(
r'R\.bindContent\(({.+?})\);', webpage,
'content'), video_id)['content']
mpd_url, sources = re.search(
r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>',
content).groups()
webpage).groups()
formats = self._extract_mpd_formats(
self._proto_relative_url(mpd_url), video_id, mpd_id='DASH')
for format_id, height, format_url in re.findall(r'<source\s+res="((\d+)h?)"\s+src="([^"]+)"', sources):
@@ -42,10 +43,11 @@ class PlaysTVIE(InfoExtractor):
})
self._sort_formats(formats)
return {
info.update({
'id': video_id,
'title': title,
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'thumbnail': info.get('thumbnail') or self._og_search_thumbnail(webpage),
'formats': formats,
}
})
return info

View File

@@ -33,7 +33,7 @@ class PornHubIE(InfoExtractor):
(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/
)
(?P<id>[0-9a-z]+)
(?P<id>[\da-z]+)
'''
_TESTS = [{
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
@@ -96,12 +96,11 @@ class PornHubIE(InfoExtractor):
'only_matching': True,
}]
@classmethod
def _extract_url(cls, webpage):
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/\d+)\1', webpage)
if mobj:
return mobj.group('url')
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/[\da-z]+)',
webpage)
def _extract_count(self, pattern, webpage, name):
return str_to_int(self._search_regex(

View File

@@ -125,6 +125,14 @@ class RadioCanadaIE(InfoExtractor):
f4m_id='hds', fatal=False))
self._sort_formats(formats)
subtitles = {}
closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5')
if closed_caption_url:
subtitles['fr'] = [{
'url': closed_caption_url,
'ext': determine_ext(closed_caption_url, 'vtt'),
}]
return {
'id': video_id,
'title': get_meta('Title'),
@@ -135,6 +143,7 @@ class RadioCanadaIE(InfoExtractor):
'season_number': int_or_none('SrcSaison'),
'episode_number': int_or_none('SrcEpisode'),
'upload_date': unified_strdate(get_meta('Date')),
'subtitles': subtitles,
'formats': formats,
}

View File

@@ -1,5 +1,7 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -10,8 +12,8 @@ from ..utils import (
class RedTubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
_TEST = {
_VALID_URL = r'https?://(?:(?:www\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.redtube.com/66418',
'md5': '7b8c22b5e7098a3e1c09709df1126d2d',
'info_dict': {
@@ -23,11 +25,21 @@ class RedTubeIE(InfoExtractor):
'view_count': int,
'age_limit': 18,
}
}
}, {
'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)',
webpage)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
webpage = self._download_webpage(
'http://www.redtube.com/%s' % video_id, video_id)
if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']):
raise ExtractorError('Video %s has been removed' % video_id, expected=True)

View File

@@ -1,17 +1,24 @@
# coding: utf-8
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
int_or_none,
parse_iso8601,
str_or_none,
urlencode_postdata,
clean_html,
)
class ShahidIE(InfoExtractor):
_VALID_URL = r'https?://shahid\.mbc\.net/ar/episode/(?P<id>\d+)/?'
_NETRC_MACHINE = 'shahid'
_VALID_URL = r'https?://shahid\.mbc\.net/ar/(?P<type>episode|movie)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html',
'info_dict': {
@@ -27,18 +34,54 @@ class ShahidIE(InfoExtractor):
# m3u8 download
'skip_download': True,
}
}, {
'url': 'https://shahid.mbc.net/ar/movie/151746/%D8%A7%D9%84%D9%82%D9%86%D8%A7%D8%B5%D8%A9.html',
'only_matching': True
}, {
# shahid plus subscriber only
'url': 'https://shahid.mbc.net/ar/episode/90511/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html',
'only_matching': True
}]
def _call_api(self, path, video_id, note):
data = self._download_json(
'http://api.shahid.net/api/v1_1/' + path, video_id, note, query={
'apiKey': 'sh@hid0nlin3',
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
}).get('data', {})
def _real_initialize(self):
email, password = self._get_login_info()
if email is None:
return
try:
user_data = self._download_json(
'https://shahid.mbc.net/wd/service/users/login',
None, 'Logging in', data=json.dumps({
'email': email,
'password': password,
'basic': 'false',
}).encode('utf-8'), headers={
'Content-Type': 'application/json; charset=UTF-8',
})['user']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError):
fail_data = self._parse_json(
e.cause.read().decode('utf-8'), None, fatal=False)
if fail_data:
faults = fail_data.get('faults', [])
faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')])
if faults_message:
raise ExtractorError(faults_message, expected=True)
raise
self._download_webpage(
'https://shahid.mbc.net/populateContext',
None, 'Populate Context', data=urlencode_postdata({
'firstName': user_data['firstName'],
'lastName': user_data['lastName'],
'userName': user_data['email'],
'csg_user_name': user_data['email'],
'subscriberId': user_data['id'],
'sessionId': user_data['sessionId'],
}))
def _get_api_data(self, response):
data = response.get('data', {})
error = data.get('error')
if error:
@@ -49,11 +92,11 @@ class ShahidIE(InfoExtractor):
return data
def _real_extract(self, url):
video_id = self._match_id(url)
page_type, video_id = re.match(self._VALID_URL, url).groups()
player = self._call_api(
'Content/Episode/%s' % video_id,
video_id, 'Downloading player JSON')
player = self._get_api_data(self._download_json(
'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-player.html' % video_id,
video_id, 'Downloading player JSON'))
if player.get('drm'):
raise ExtractorError('This video is DRM protected.', expected=True)
@@ -61,9 +104,12 @@ class ShahidIE(InfoExtractor):
formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
self._sort_formats(formats)
video = self._call_api(
'episode/%s' % video_id, video_id,
'Downloading video JSON')['episode']
video = self._get_api_data(self._download_json(
'http://api.shahid.net/api/v1_1/%s/%s' % (page_type, video_id),
video_id, 'Downloading video JSON', query={
'apiKey': 'sh@hid0nlin3',
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
}))[page_type]
title = video['title']
categories = [

View File

@@ -32,12 +32,15 @@ class TMZArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P<id>[^/]+)/?'
_TEST = {
'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert',
'md5': 'e482a414a38db73087450e3a6ce69d00',
'md5': '3316ff838ae5bb7f642537825e1e90d2',
'info_dict': {
'id': '0_6snoelag',
'ext': 'mp4',
'ext': 'mov',
'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake',
'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."',
'timestamp': 1429467813,
'upload_date': '20150419',
'uploader_id': 'batchUser',
}
}
@@ -45,12 +48,9 @@ class TMZArticleIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
embedded_video_info_str = self._html_search_regex(
r'tmzVideoEmbedV2\("([^)]+)"\);', webpage, 'embedded video info')
embedded_video_info = self._parse_json(
embedded_video_info_str, video_id,
transform_source=lambda s: s.replace('\\', ''))
embedded_video_info = self._parse_json(self._html_search_regex(
r'tmzVideoEmbed\(({.+?})\);', webpage, 'embedded video info'),
video_id)
return self.url_result(
'http://www.tmz.com/videos/%s/' % embedded_video_info['id'])

View File

@@ -15,11 +15,11 @@ from ..utils import (
class TouTvIE(InfoExtractor):
_NETRC_MACHINE = 'toutv'
IE_NAME = 'tou.tv'
_VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+/S[0-9]+E[0-9]+)'
_VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+E[0-9]+)?)'
_access_token = None
_claims = None
_TEST = {
_TESTS = [{
'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17',
'info_dict': {
'id': '122017',
@@ -33,7 +33,10 @@ class TouTvIE(InfoExtractor):
'skip_download': True,
},
'skip': '404 Not Found',
}
}, {
'url': 'http://ici.tou.tv/hackers',
'only_matching': True,
}]
def _real_initialize(self):
email, password = self._get_login_info()

View File

@@ -9,7 +9,6 @@ from ..utils import (
int_or_none,
sanitized_Request,
urlencode_postdata,
parse_iso8601,
)
@@ -19,17 +18,13 @@ class TubiTvIE(InfoExtractor):
_NETRC_MACHINE = 'tubitv'
_TEST = {
'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday',
'md5': '43ac06be9326f41912dc64ccf7a80320',
'info_dict': {
'id': '283829',
'ext': 'mp4',
'title': 'The Comedian at The Friday',
'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.',
'uploader': 'Indie Rights Films',
'upload_date': '20160111',
'timestamp': 1452555979,
},
'params': {
'skip_download': 'HLS download',
'uploader_id': 'bc168bee0d18dd1cb3b86c68706ab434',
},
}
@@ -58,19 +53,28 @@ class TubiTvIE(InfoExtractor):
video_id = self._match_id(url)
video_data = self._download_json(
'http://tubitv.com/oz/videos/%s/content' % video_id, video_id)
title = video_data['n']
title = video_data['title']
formats = self._extract_m3u8_formats(
video_data['mh'], video_id, 'mp4', 'm3u8_native')
self._proto_relative_url(video_data['url']),
video_id, 'mp4', 'm3u8_native')
self._sort_formats(formats)
thumbnails = []
for thumbnail_url in video_data.get('thumbnails', []):
if not thumbnail_url:
continue
thumbnails.append({
'url': self._proto_relative_url(thumbnail_url),
})
subtitles = {}
for sub in video_data.get('sb', []):
sub_url = sub.get('u')
for sub in video_data.get('subtitles', []):
sub_url = sub.get('url')
if not sub_url:
continue
subtitles.setdefault(sub.get('l', 'en'), []).append({
'url': sub_url,
subtitles.setdefault(sub.get('lang', 'English'), []).append({
'url': self._proto_relative_url(sub_url),
})
return {
@@ -78,9 +82,8 @@ class TubiTvIE(InfoExtractor):
'title': title,
'formats': formats,
'subtitles': subtitles,
'thumbnail': video_data.get('ph'),
'description': video_data.get('d'),
'duration': int_or_none(video_data.get('s')),
'timestamp': parse_iso8601(video_data.get('u')),
'uploader': video_data.get('on'),
'thumbnails': thumbnails,
'description': video_data.get('description'),
'duration': int_or_none(video_data.get('duration')),
'uploader_id': video_data.get('publisher_id'),
}

View File

@@ -1,12 +1,93 @@
# coding: utf-8
from __future__ import unicode_literals
import re
import time
import hashlib
import json
from .adobepass import AdobePassIE
from .common import InfoExtractor
from ..utils import ExtractorError
from ..compat import compat_HTTPError
from ..utils import (
int_or_none,
parse_age_limit,
str_or_none,
parse_duration,
ExtractorError,
extract_attributes,
)
class ViceIE(InfoExtractor):
class ViceBaseIE(AdobePassIE):
def _extract_preplay_video(self, url, webpage):
watch_hub_data = extract_attributes(self._search_regex(
r'(?s)(<watch-hub\s*.+?</watch-hub>)', webpage, 'watch hub'))
video_id = watch_hub_data['vms-id']
title = watch_hub_data['video-title']
query = {}
is_locked = watch_hub_data.get('video-locked') == '1'
if is_locked:
resource = self._get_mvpd_resource(
'VICELAND', title, video_id,
watch_hub_data.get('video-rating'))
query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource)
# signature generation algorithm is reverse engineered from signatureGenerator in
# webpack:///../shared/~/vice-player/dist/js/vice-player.js in
# https://www.viceland.com/assets/common/js/web.vendor.bundle.js
exp = int(time.time()) + 14400
query.update({
'exp': exp,
'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
})
try:
host = 'www.viceland' if is_locked else self._PREPLAY_HOST
preplay = self._download_json('https://%s.com/en_us/preplay/%s' % (host, video_id), video_id, query=query)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
error = json.loads(e.cause.read().decode())
raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True)
raise
video_data = preplay['video']
base = video_data['base']
uplynk_preplay_url = preplay['preplayURL']
episode = video_data.get('episode', {})
channel = video_data.get('channel', {})
subtitles = {}
cc_url = preplay.get('ccURL')
if cc_url:
subtitles['en'] = [{
'url': cc_url,
}]
return {
'_type': 'url_transparent',
'url': uplynk_preplay_url,
'id': video_id,
'title': title,
'description': base.get('body'),
'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'),
'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')),
'timestamp': int_or_none(video_data.get('created_at')),
'age_limit': parse_age_limit(video_data.get('video_rating')),
'series': video_data.get('show_title') or watch_hub_data.get('show-title'),
'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')),
'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
'season_number': int_or_none(watch_hub_data.get('season')),
'season_id': str_or_none(episode.get('season_id')),
'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'),
'uploader_id': str_or_none(channel.get('id')),
'subtitles': subtitles,
'ie_key': 'UplynkPreplay',
}
class ViceIE(ViceBaseIE):
_VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?videos?/(?P<id>[^/?#&]+)'
_TESTS = [{
@@ -21,7 +102,7 @@ class ViceIE(InfoExtractor):
'add_ie': ['Ooyala'],
}, {
'url': 'http://www.vice.com/video/how-to-hack-a-car',
'md5': '6fb2989a3fed069fb8eab3401fc2d3c9',
'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
'info_dict': {
'id': '3jstaBeXgAs',
'ext': 'mp4',
@@ -32,6 +113,22 @@ class ViceIE(InfoExtractor):
'upload_date': '20140529',
},
'add_ie': ['Youtube'],
}, {
'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
'md5': '',
'info_dict': {
'id': '5816510690b70e6c5fd39a56',
'ext': 'mp4',
'uploader': 'Waypoint',
'title': 'The Signal From Tölva',
'uploader_id': '57f7d621e05ca860fa9ccaf9',
'timestamp': 1477941983938,
},
'params': {
# m3u8 download
'skip_download': True,
},
'add_ie': ['UplynkPreplay'],
}, {
'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
'only_matching': True,
@@ -42,21 +139,21 @@ class ViceIE(InfoExtractor):
'url': 'https://munchies.vice.com/en/videos/watch-the-trailer-for-our-new-series-the-pizza-show',
'only_matching': True,
}]
_PREPLAY_HOST = 'video.vice'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
try:
embed_code = self._search_regex(
r'embedCode=([^&\'"]+)', webpage,
'ooyala embed code', default=None)
if embed_code:
return self.url_result('ooyala:%s' % embed_code, 'Ooyala')
youtube_id = self._search_regex(
r'data-youtube-id="([^"]+)"', webpage, 'youtube id')
webpage, urlh = self._download_webpage_handle(url, video_id)
embed_code = self._search_regex(
r'embedCode=([^&\'"]+)', webpage,
'ooyala embed code', default=None)
if embed_code:
return self.url_result('ooyala:%s' % embed_code, 'Ooyala')
youtube_id = self._search_regex(
r'data-youtube-id="([^"]+)"', webpage, 'youtube id', default=None)
if youtube_id:
return self.url_result(youtube_id, 'Youtube')
except ExtractorError:
raise ExtractorError('The page doesn\'t contain a video', expected=True)
return self._extract_preplay_video(urlh.geturl(), webpage)
class ViceShowIE(InfoExtractor):

View File

@@ -1,23 +1,10 @@
# coding: utf-8
from __future__ import unicode_literals
import time
import hashlib
import json
from .adobepass import AdobePassIE
from ..compat import compat_HTTPError
from ..utils import (
int_or_none,
parse_age_limit,
str_or_none,
parse_duration,
ExtractorError,
extract_attributes,
)
from .vice import ViceBaseIE
class VicelandIE(AdobePassIE):
class VicelandIE(ViceBaseIE):
_VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P<id>[a-f0-9]+)'
_TEST = {
'url': 'https://www.viceland.com/en_us/video/cyberwar-trailer/57608447973ee7705f6fbd4e',
@@ -38,70 +25,9 @@ class VicelandIE(AdobePassIE):
},
'add_ie': ['UplynkPreplay'],
}
_PREPLAY_HOST = 'www.viceland'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
watch_hub_data = extract_attributes(self._search_regex(
r'(?s)(<watch-hub\s*.+?</watch-hub>)', webpage, 'watch hub'))
video_id = watch_hub_data['vms-id']
title = watch_hub_data['video-title']
query = {}
if watch_hub_data.get('video-locked') == '1':
resource = self._get_mvpd_resource(
'VICELAND', title, video_id,
watch_hub_data.get('video-rating'))
query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource)
# signature generation algorithm is reverse engineered from signatureGenerator in
# webpack:///../shared/~/vice-player/dist/js/vice-player.js in
# https://www.viceland.com/assets/common/js/web.vendor.bundle.js
exp = int(time.time()) + 14400
query.update({
'exp': exp,
'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
})
try:
preplay = self._download_json('https://www.viceland.com/en_us/preplay/%s' % video_id, video_id, query=query)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
error = json.loads(e.cause.read().decode())
raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True)
raise
video_data = preplay['video']
base = video_data['base']
uplynk_preplay_url = preplay['preplayURL']
episode = video_data.get('episode', {})
channel = video_data.get('channel', {})
subtitles = {}
cc_url = preplay.get('ccURL')
if cc_url:
subtitles['en'] = [{
'url': cc_url,
}]
return {
'_type': 'url_transparent',
'url': uplynk_preplay_url,
'id': video_id,
'title': title,
'description': base.get('body'),
'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'),
'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')),
'timestamp': int_or_none(video_data.get('created_at')),
'age_limit': parse_age_limit(video_data.get('video_rating')),
'series': video_data.get('show_title') or watch_hub_data.get('show-title'),
'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')),
'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
'season_number': int_or_none(watch_hub_data.get('season')),
'season_id': str_or_none(episode.get('season_id')),
'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'),
'uploader_id': str_or_none(channel.get('id')),
'subtitles': subtitles,
'ie_key': 'UplynkPreplay',
}
return self._extract_preplay_video(url, webpage)

View File

@@ -17,7 +17,7 @@ from ..compat import compat_urllib_parse_urlencode
class VLiveIE(InfoExtractor):
IE_NAME = 'vlive'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
_TEST = {
_TESTS = [{
'url': 'http://www.vlive.tv/video/1326',
'md5': 'cc7314812855ce56de70a06a27314983',
'info_dict': {
@@ -27,7 +27,20 @@ class VLiveIE(InfoExtractor):
'creator': "Girl's Day",
'view_count': int,
},
}
}, {
'url': 'http://www.vlive.tv/video/16937',
'info_dict': {
'id': '16937',
'ext': 'mp4',
'title': '[V LIVE] 첸백시 걍방',
'creator': 'EXO',
'view_count': int,
'subtitles': 'mincount:12',
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -116,7 +129,7 @@ class VLiveIE(InfoExtractor):
subtitles = {}
for caption in playinfo.get('captions', {}).get('list', []):
lang = dict_get(caption, ('language', 'locale', 'country', 'label'))
lang = dict_get(caption, ('locale', 'language', 'country', 'label'))
if lang and caption.get('source'):
subtitles[lang] = [{
'ext': 'vtt',

View File

@@ -31,7 +31,8 @@ class VodlockerIE(InfoExtractor):
if any(p in webpage for p in (
'>THIS FILE WAS DELETED<',
'>File Not Found<',
'The file you were looking for could not be found, sorry for any inconvenience.<')):
'The file you were looking for could not be found, sorry for any inconvenience.<',
'>The file was removed')):
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
fields = self._hidden_inputs(webpage)

View File

@@ -0,0 +1,55 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
float_or_none,
)
class VzaarIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)'
_TESTS = [{
'url': 'https://vzaar.com/videos/1152805',
'md5': 'bde5ddfeb104a6c56a93a06b04901dbf',
'info_dict': {
'id': '1152805',
'ext': 'mp4',
'title': 'sample video (public)',
},
}, {
'url': 'https://view.vzaar.com/27272/player',
'md5': '3b50012ac9bbce7f445550d54e0508f2',
'info_dict': {
'id': '27272',
'ext': 'mp3',
'title': 'MP3',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
'http://view.vzaar.com/v2/%s/video' % video_id, video_id)
source_url = video_data['sourceUrl']
info = {
'id': video_id,
'title': video_data['videoTitle'],
'url': source_url,
'thumbnail': self._proto_relative_url(video_data.get('poster')),
'duration': float_or_none(video_data.get('videoDuration')),
}
if 'audio' in source_url:
info.update({
'vcodec': 'none',
'ext': 'mp3',
})
else:
info.update({
'width': int_or_none(video_data.get('width')),
'height': int_or_none(video_data.get('height')),
'ext': 'mp4',
})
return info

View File

@@ -201,6 +201,32 @@ class YahooIE(InfoExtractor):
},
'skip': 'redirect to https://www.yahoo.com/music',
},
{
# yahoo://article/
'url': 'https://www.yahoo.com/movies/video/true-story-trailer-173000497.html',
'info_dict': {
'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
'ext': 'mp4',
'title': "'True Story' Trailer",
'description': 'True Story',
},
'params': {
'skip_download': True,
},
},
{
# ytwnews://cavideo/
'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html',
'info_dict': {
'id': 'ba133ff2-0793-3510-b636-59dfe9ff6cff',
'ext': 'mp4',
'title': '單車天使 - 中文版預',
'description': '中文版預',
},
'params': {
'skip_download': True,
},
},
]
def _real_extract(self, url):
@@ -269,7 +295,8 @@ class YahooIE(InfoExtractor):
r'"first_videoid"\s*:\s*"([^"]+)"',
r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id),
r'<article[^>]data-uuid=["\']([^"\']+)',
r'yahoo://article/view\?.*\buuid=([^&"\']+)',
r'<meta[^<>]+yahoo://article/view\?.*\buuid=([^&"\']+)',
r'<meta[^<>]+["\']ytwnews://cavideo/(?:[^/]+/)+([\da-fA-F-]+)[&"\']',
]
video_id = self._search_regex(
CONTENT_ID_REGEXES, webpage, 'content ID')

View File

@@ -2175,7 +2175,7 @@ class YoutubeUserIE(YoutubeChannelIE):
class YoutubeLiveIE(YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube.com live streams'
_VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+))/live'
_VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
IE_NAME = 'youtube:live'
_TESTS = [{
@@ -2204,6 +2204,9 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor):
}, {
'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
'only_matching': True,
}, {
'url': 'https://www.youtube.com/TheYoungTurks/live',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -198,12 +198,12 @@ class JSInterpreter(object):
return opfunc(x, y)
m = re.match(
r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]+)\)$' % _NAME_RE, expr)
r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr)
if m:
fname = m.group('func')
argvals = tuple([
int(v) if v.isdigit() else local_vars[v]
for v in m.group('args').split(',')])
for v in m.group('args').split(',')]) if len(m.group('args')) > 0 else tuple()
if fname not in self._functions:
self._functions[fname] = self.extract_function(fname)
return self._functions[fname](argvals)

View File

@@ -103,6 +103,7 @@ class ProxyType(object):
SOCKS4A = 1
SOCKS5 = 2
Proxy = collections.namedtuple('Proxy', (
'type', 'host', 'port', 'username', 'password', 'remote_dns'))

View File

@@ -115,6 +115,8 @@ def _u30(reader):
res = _read_int(reader)
assert res & 0xf0000000 == 0
return res
_u32 = _read_int
@@ -176,6 +178,7 @@ class _Undefined(object):
return 'undefined'
__repr__ = __str__
undefined = _Undefined()

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2016.11.02'
__version__ = '2016.11.18'