Compare commits
191 Commits
2014.12.13
...
2015.01.03
Author | SHA1 | Date | |
---|---|---|---|
|
19b05d886e | ||
|
e65566a9cc | ||
|
baa3c3f0f6 | ||
|
f4f339529c | ||
|
7d02fae85b | ||
|
6e46c3f1fd | ||
|
c7e675940c | ||
|
d26b1317ed | ||
|
a221f22969 | ||
|
817f786fbb | ||
|
62420c73cb | ||
|
2522a0b7da | ||
|
46d32a12c9 | ||
|
c491418526 | ||
|
823a155293 | ||
|
324b2c78fa | ||
|
d34f98289b | ||
|
644096b15c | ||
|
15cebcc363 | ||
|
faa4ea68c0 | ||
|
29a9385ff0 | ||
|
476eae0c2a | ||
|
8399267671 | ||
|
db546cf87f | ||
|
317639758a | ||
|
fdbabca85f | ||
|
6f790e5821 | ||
|
6f5cdeb611 | ||
|
9eb4f404cb | ||
|
f58487b392 | ||
|
5b9aefef77 | ||
|
772fd5cc44 | ||
|
50a0f6df7e | ||
|
9f435c5f1c | ||
|
931e2d1d26 | ||
|
a42419da42 | ||
|
9a237b776c | ||
|
02ec32a1ef | ||
|
a1e9e6440f | ||
|
5878e6398c | ||
|
6c6f1408f2 | ||
|
b7a7319c38 | ||
|
68f705cac5 | ||
|
079d1dcd80 | ||
|
7b24bbdf49 | ||
|
f86d543ebb | ||
|
60e47a2699 | ||
|
b8bc7a696b | ||
|
7d900ef1bf | ||
|
1931a73f39 | ||
|
966ea3aebd | ||
|
b3013681ff | ||
|
416c7fcbce | ||
|
e83eebb12f | ||
|
a349873226 | ||
|
fccae2b911 | ||
|
3ee08848db | ||
|
0129b4dd45 | ||
|
1c57e7f1f4 | ||
|
d0caf3a11e | ||
|
a87bb090d9 | ||
|
beb95e7781 | ||
|
5435d7af91 | ||
|
0c0a70f4c6 | ||
|
e3947e2b7f | ||
|
da3f7fb7f8 | ||
|
429ddfd38d | ||
|
479514d015 | ||
|
355e41466d | ||
|
03d9aad87c | ||
|
3e2bcf530b | ||
|
6343a5f68e | ||
|
00de9a9828 | ||
|
7fc2cd819e | ||
|
974739aab5 | ||
|
0cc4f8e385 | ||
|
513fd2a872 | ||
|
ae6986fb89 | ||
|
e8e28989eb | ||
|
0fa629d05b | ||
|
ff7a07d5c4 | ||
|
5a18403057 | ||
|
1b7b1d6eac | ||
|
23cfa4ae45 | ||
|
e82def52a9 | ||
|
bcfe9db299 | ||
|
cf00ae7640 | ||
|
f9b9e88646 | ||
|
c2500434c3 | ||
|
f74b341dde | ||
|
461b00f34a | ||
|
4cda41ac7b | ||
|
6a1c4fbfcb | ||
|
31424c126f | ||
|
53096539dc | ||
|
2c0b475235 | ||
|
a542405200 | ||
|
3e2b085ef9 | ||
|
885e4384a1 | ||
|
2b8f151094 | ||
|
5ac71f0b27 | ||
|
39ac7c9435 | ||
|
ed7bdc8a90 | ||
|
55f0cab3a3 | ||
|
544dec6298 | ||
|
e0ae1814b1 | ||
|
9532d72371 | ||
|
1362bbbb4b | ||
|
f00fd51dae | ||
|
a8896c5ac2 | ||
|
5d3808524d | ||
|
c8f167823f | ||
|
70f6796e7d | ||
|
85d253af6b | ||
|
a86cbf5876 | ||
|
3f1399de8a | ||
|
1f809a8560 | ||
|
653d14e2f9 | ||
|
85fab7e47b | ||
|
3aa9176f08 | ||
|
33b53b6021 | ||
|
3f7421b71b | ||
|
ee45625290 | ||
|
2c2a42587b | ||
|
e2f65efcf9 | ||
|
081d6e4784 | ||
|
1d4247f64e | ||
|
1ff30d7b79 | ||
|
16ea817968 | ||
|
a2a4bae929 | ||
|
c58843b3a1 | ||
|
a22524b004 | ||
|
87c4c21e75 | ||
|
b9465395cb | ||
|
edf41477f0 | ||
|
5f627b4448 | ||
|
60e5428925 | ||
|
748ec66725 | ||
|
e54a3a2f01 | ||
|
0e4cb4f406 | ||
|
f7ffe72ac7 | ||
|
cd58dc3e56 | ||
|
c33bcf2051 | ||
|
7642c08763 | ||
|
fdc8000810 | ||
|
a91c9b15e3 | ||
|
27d67ea2ba | ||
|
d6a8160902 | ||
|
6e1b9395c6 | ||
|
b1ccbed3d4 | ||
|
37381350f8 | ||
|
7af808a5ef | ||
|
876bef5937 | ||
|
a16af51873 | ||
|
dc9a441bfa | ||
|
ee6dfe8308 | ||
|
2cb5b03e53 | ||
|
964b190350 | ||
|
13d27a42cc | ||
|
ec05fee43a | ||
|
b50e3bc67f | ||
|
ac78b5e97b | ||
|
17e0d63957 | ||
|
9209fe3878 | ||
|
84d84211ac | ||
|
b4116dcdd5 | ||
|
bb18d787b5 | ||
|
0647084f39 | ||
|
734ea11e3c | ||
|
3940450878 | ||
|
ccbfaa83b0 | ||
|
d86007873e | ||
|
4b7df0d30c | ||
|
caff59499c | ||
|
99a0f9824a | ||
|
3013bbb27d | ||
|
6f9b54933f | ||
|
1bbe317508 | ||
|
e97a534f13 | ||
|
8acb83d993 | ||
|
71b640cc5b | ||
|
4f026fafbc | ||
|
39f594d660 | ||
|
cae97f6521 | ||
|
6cbf345f28 | ||
|
a0ab29f8a1 | ||
|
4a4fbfc967 | ||
|
408b5839b1 | ||
|
60620368d7 | ||
|
4927de4f86 | ||
|
bad5c1a303 |
5
AUTHORS
5
AUTHORS
@@ -93,3 +93,8 @@ Zack Fernandes
|
|||||||
cryptonaut
|
cryptonaut
|
||||||
Adrian Kretz
|
Adrian Kretz
|
||||||
Mathias Rav
|
Mathias Rav
|
||||||
|
Petr Kutalek
|
||||||
|
Will Glynn
|
||||||
|
Max Reimann
|
||||||
|
Cédric Luthi
|
||||||
|
Thijs Vermeir
|
||||||
|
16
Makefile
16
Makefile
@@ -1,4 +1,4 @@
|
|||||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
|
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json CONTRIBUTING.md.tmp
|
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json CONTRIBUTING.md.tmp
|
||||||
@@ -35,13 +35,22 @@ install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtu
|
|||||||
install -d $(DESTDIR)$(SYSCONFDIR)/fish/completions
|
install -d $(DESTDIR)$(SYSCONFDIR)/fish/completions
|
||||||
install -m 644 youtube-dl.fish $(DESTDIR)$(SYSCONFDIR)/fish/completions/youtube-dl.fish
|
install -m 644 youtube-dl.fish $(DESTDIR)$(SYSCONFDIR)/fish/completions/youtube-dl.fish
|
||||||
|
|
||||||
|
codetest:
|
||||||
|
flake8 .
|
||||||
|
|
||||||
test:
|
test:
|
||||||
#nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test
|
#nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test
|
||||||
nosetests --verbose test
|
nosetests --verbose test
|
||||||
|
$(MAKE) codetest
|
||||||
|
|
||||||
|
ot: offlinetest
|
||||||
|
|
||||||
|
offlinetest: codetest
|
||||||
|
nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations
|
||||||
|
|
||||||
tar: youtube-dl.tar.gz
|
tar: youtube-dl.tar.gz
|
||||||
|
|
||||||
.PHONY: all clean install test tar bash-completion pypi-files zsh-completion fish-completion
|
.PHONY: all clean install test tar bash-completion pypi-files zsh-completion fish-completion ot offlinetest codetest supportedsites
|
||||||
|
|
||||||
pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish
|
pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish
|
||||||
|
|
||||||
@@ -59,6 +68,9 @@ README.md: youtube_dl/*.py youtube_dl/*/*.py
|
|||||||
CONTRIBUTING.md: README.md
|
CONTRIBUTING.md: README.md
|
||||||
python devscripts/make_contributing.py README.md CONTRIBUTING.md
|
python devscripts/make_contributing.py README.md CONTRIBUTING.md
|
||||||
|
|
||||||
|
supportedsites:
|
||||||
|
python devscripts/make_supportedsites.py docs/supportedsites.md
|
||||||
|
|
||||||
README.txt: README.md
|
README.txt: README.md
|
||||||
pandoc -f markdown -t plain README.md -o README.txt
|
pandoc -f markdown -t plain README.md -o README.txt
|
||||||
|
|
||||||
|
68
README.md
68
README.md
@@ -1,7 +1,15 @@
|
|||||||
youtube-dl - download videos from youtube.com or other video platforms
|
youtube-dl - download videos from youtube.com or other video platforms
|
||||||
|
|
||||||
# SYNOPSIS
|
- [INSTALLATION](#installation)
|
||||||
**youtube-dl** [OPTIONS] URL [URL...]
|
- [DESCRIPTION](#description)
|
||||||
|
- [OPTIONS](#options)
|
||||||
|
- [CONFIGURATION](#configuration)
|
||||||
|
- [OUTPUT TEMPLATE](#output-template)
|
||||||
|
- [VIDEO SELECTION](#video-selection)
|
||||||
|
- [FAQ](#faq)
|
||||||
|
- [DEVELOPER INSTRUCTIONS](#developer-instructions)
|
||||||
|
- [BUGS](#bugs)
|
||||||
|
- [COPYRIGHT](#copyright)
|
||||||
|
|
||||||
# INSTALLATION
|
# INSTALLATION
|
||||||
|
|
||||||
@@ -34,6 +42,8 @@ YouTube.com and a few more sites. It requires the Python interpreter, version
|
|||||||
your Unix box, on Windows or on Mac OS X. It is released to the public domain,
|
your Unix box, on Windows or on Mac OS X. It is released to the public domain,
|
||||||
which means you can modify it, redistribute it or use it however you like.
|
which means you can modify it, redistribute it or use it however you like.
|
||||||
|
|
||||||
|
youtube-dl [OPTIONS] URL [URL...]
|
||||||
|
|
||||||
# OPTIONS
|
# OPTIONS
|
||||||
-h, --help print this help text and exit
|
-h, --help print this help text and exit
|
||||||
--version print program version and exit
|
--version print program version and exit
|
||||||
@@ -439,6 +449,14 @@ Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unz
|
|||||||
|
|
||||||
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
|
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
|
||||||
|
|
||||||
|
### How can I detect whether a given URL is supported by youtube-dl?
|
||||||
|
|
||||||
|
For one, have a look at the [list of supported sites](docs/supportedsites). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
||||||
|
|
||||||
|
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
|
||||||
|
|
||||||
|
If you want to find out whether a given URL is supported, simply call youtube-dl with it. If you get no videos back, chances are the URL is either not referring to a video or unsupported. You can find out which by examining the output (if you run youtube-dl on the console) or catching an `UnsupportedError` exception if you run it from a Python program.
|
||||||
|
|
||||||
# DEVELOPER INSTRUCTIONS
|
# DEVELOPER INSTRUCTIONS
|
||||||
|
|
||||||
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
||||||
@@ -529,14 +547,52 @@ youtube-dl makes the best effort to be a good command-line program, and thus sho
|
|||||||
|
|
||||||
From a Python program, you can embed youtube-dl in a more powerful fashion, like this:
|
From a Python program, you can embed youtube-dl in a more powerful fashion, like this:
|
||||||
|
|
||||||
import youtube_dl
|
```python
|
||||||
|
import youtube_dl
|
||||||
|
|
||||||
ydl_opts = {}
|
ydl_opts = {}
|
||||||
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||||
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
|
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
|
||||||
|
```
|
||||||
|
|
||||||
Most likely, you'll want to use various options. For a list of what can be done, have a look at [youtube_dl/YoutubeDL.py](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L69). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
|
Most likely, you'll want to use various options. For a list of what can be done, have a look at [youtube_dl/YoutubeDL.py](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L69). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
|
||||||
|
|
||||||
|
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import youtube_dl
|
||||||
|
|
||||||
|
|
||||||
|
class MyLogger(object):
|
||||||
|
def debug(self, msg):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def warning(self, msg):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def error(self, msg):
|
||||||
|
print(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def my_hook(d):
|
||||||
|
if d['status'] == 'finished':
|
||||||
|
print('Done downloading, now converting ...')
|
||||||
|
|
||||||
|
|
||||||
|
ydl_opts = {
|
||||||
|
'format': 'bestaudio/best',
|
||||||
|
'postprocessors': [{
|
||||||
|
'key': 'FFmpegExtractAudio',
|
||||||
|
'preferredcodec': 'mp3',
|
||||||
|
'preferredquality': '192',
|
||||||
|
}],
|
||||||
|
'logger': MyLogger(),
|
||||||
|
'progress_hooks': [my_hook],
|
||||||
|
}
|
||||||
|
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||||
|
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
|
||||||
|
```
|
||||||
|
|
||||||
# BUGS
|
# BUGS
|
||||||
|
|
||||||
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the irc channel #youtube-dl on freenode.
|
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the irc channel #youtube-dl on freenode.
|
||||||
|
@@ -1,20 +1,20 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import argparse
|
|
||||||
import io
|
import io
|
||||||
|
import optparse
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser()
|
parser = optparse.OptionParser(usage='%prog INFILE OUTFILE')
|
||||||
parser.add_argument(
|
options, args = parser.parse_args()
|
||||||
'INFILE', help='README.md file name to read from')
|
if len(args) != 2:
|
||||||
parser.add_argument(
|
parser.error('Expected an input and an output filename')
|
||||||
'OUTFILE', help='CONTRIBUTING.md file name to write to')
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
with io.open(args.INFILE, encoding='utf-8') as inf:
|
infile, outfile = args
|
||||||
|
|
||||||
|
with io.open(infile, encoding='utf-8') as inf:
|
||||||
readme = inf.read()
|
readme = inf.read()
|
||||||
|
|
||||||
bug_text = re.search(
|
bug_text = re.search(
|
||||||
@@ -25,7 +25,7 @@ def main():
|
|||||||
|
|
||||||
out = bug_text + dev_text
|
out = bug_text + dev_text
|
||||||
|
|
||||||
with io.open(args.OUTFILE, 'w', encoding='utf-8') as outf:
|
with io.open(outfile, 'w', encoding='utf-8') as outf:
|
||||||
outf.write(out)
|
outf.write(out)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
45
devscripts/make_supportedsites.py
Normal file
45
devscripts/make_supportedsites.py
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import io
|
||||||
|
import optparse
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
# Import youtube_dl
|
||||||
|
ROOT_DIR = os.path.join(os.path.dirname(__file__), '..')
|
||||||
|
sys.path.append(ROOT_DIR)
|
||||||
|
import youtube_dl
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = optparse.OptionParser(usage='%prog OUTFILE.md')
|
||||||
|
options, args = parser.parse_args()
|
||||||
|
if len(args) != 1:
|
||||||
|
parser.error('Expected an output filename')
|
||||||
|
|
||||||
|
outfile, = args
|
||||||
|
|
||||||
|
def gen_ies_md(ies):
|
||||||
|
for ie in ies:
|
||||||
|
ie_md = '**{}**'.format(ie.IE_NAME)
|
||||||
|
ie_desc = getattr(ie, 'IE_DESC', None)
|
||||||
|
if ie_desc is False:
|
||||||
|
continue
|
||||||
|
if ie_desc is not None:
|
||||||
|
ie_md += ': {}'.format(ie.IE_DESC)
|
||||||
|
if not ie.working():
|
||||||
|
ie_md += ' (Currently broken)'
|
||||||
|
yield ie_md
|
||||||
|
|
||||||
|
ies = sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower())
|
||||||
|
out = '# Supported sites\n' + ''.join(
|
||||||
|
' - ' + md + '\n'
|
||||||
|
for md in gen_ies_md(ies))
|
||||||
|
|
||||||
|
with io.open(outfile, 'w', encoding='utf-8') as outf:
|
||||||
|
outf.write(out)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@@ -11,8 +11,19 @@ README_FILE = os.path.join(ROOT_DIR, 'README.md')
|
|||||||
with io.open(README_FILE, encoding='utf-8') as f:
|
with io.open(README_FILE, encoding='utf-8') as f:
|
||||||
readme = f.read()
|
readme = f.read()
|
||||||
|
|
||||||
PREFIX = '%YOUTUBE-DL(1)\n\n# NAME\n'
|
PREFIX = '''%YOUTUBE-DL(1)
|
||||||
readme = re.sub(r'(?s)# INSTALLATION.*?(?=# DESCRIPTION)', '', readme)
|
|
||||||
|
# NAME
|
||||||
|
|
||||||
|
youtube\-dl \- download videos from youtube.com or other video platforms
|
||||||
|
|
||||||
|
# SYNOPSIS
|
||||||
|
|
||||||
|
**youtube-dl** \[OPTIONS\] URL [URL...]
|
||||||
|
|
||||||
|
'''
|
||||||
|
readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
|
||||||
|
readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
|
||||||
readme = PREFIX + readme
|
readme = PREFIX + readme
|
||||||
|
|
||||||
if sys.version_info < (3, 0):
|
if sys.version_info < (3, 0):
|
||||||
|
500
docs/supportedsites.md
Normal file
500
docs/supportedsites.md
Normal file
@@ -0,0 +1,500 @@
|
|||||||
|
# Supported sites
|
||||||
|
- **1up.com**
|
||||||
|
- **220.ro**
|
||||||
|
- **24video**
|
||||||
|
- **3sat**
|
||||||
|
- **4tube**
|
||||||
|
- **56.com**
|
||||||
|
- **5min**
|
||||||
|
- **8tracks**
|
||||||
|
- **9gag**
|
||||||
|
- **abc.net.au**
|
||||||
|
- **AcademicEarth:Course**
|
||||||
|
- **AddAnime**
|
||||||
|
- **AdobeTV**
|
||||||
|
- **AdultSwim**
|
||||||
|
- **Aftonbladet**
|
||||||
|
- **AlJazeera**
|
||||||
|
- **Allocine**
|
||||||
|
- **anitube.se**
|
||||||
|
- **AnySex**
|
||||||
|
- **Aparat**
|
||||||
|
- **AppleTrailers**
|
||||||
|
- **archive.org**: archive.org videos
|
||||||
|
- **ARD**
|
||||||
|
- **ARD:mediathek**
|
||||||
|
- **arte.tv**
|
||||||
|
- **arte.tv:+7**
|
||||||
|
- **arte.tv:concert**
|
||||||
|
- **arte.tv:creative**
|
||||||
|
- **arte.tv:ddc**
|
||||||
|
- **arte.tv:embed**
|
||||||
|
- **arte.tv:future**
|
||||||
|
- **audiomack**
|
||||||
|
- **AUEngine**
|
||||||
|
- **Azubu**
|
||||||
|
- **bambuser**
|
||||||
|
- **bambuser:channel**
|
||||||
|
- **Bandcamp**
|
||||||
|
- **Bandcamp:album**
|
||||||
|
- **bbc.co.uk**: BBC iPlayer
|
||||||
|
- **Beeg**
|
||||||
|
- **BehindKink**
|
||||||
|
- **Bet**
|
||||||
|
- **Bild**: Bild.de
|
||||||
|
- **BiliBili**
|
||||||
|
- **blinkx**
|
||||||
|
- **blip.tv:user**
|
||||||
|
- **BlipTV**
|
||||||
|
- **Bloomberg**
|
||||||
|
- **Bpb**: Bundeszentrale für politische Bildung
|
||||||
|
- **BR**: Bayerischer Rundfunk Mediathek
|
||||||
|
- **Break**
|
||||||
|
- **Brightcove**
|
||||||
|
- **BuzzFeed**
|
||||||
|
- **BYUtv**
|
||||||
|
- **Canal13cl**
|
||||||
|
- **canalc2.tv**
|
||||||
|
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||||
|
- **CBS**
|
||||||
|
- **CBSNews**: CBS News
|
||||||
|
- **CeskaTelevize**
|
||||||
|
- **channel9**: Channel 9
|
||||||
|
- **Chilloutzone**
|
||||||
|
- **Cinchcast**
|
||||||
|
- **Cinemassacre**
|
||||||
|
- **clipfish**
|
||||||
|
- **cliphunter**
|
||||||
|
- **Clipsyndicate**
|
||||||
|
- **Cloudy**
|
||||||
|
- **Clubic**
|
||||||
|
- **cmt.com**
|
||||||
|
- **CNET**
|
||||||
|
- **CNN**
|
||||||
|
- **CNNBlogs**
|
||||||
|
- **CollegeHumor**
|
||||||
|
- **ComCarCoff**
|
||||||
|
- **ComedyCentral**
|
||||||
|
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
||||||
|
- **CondeNast**: Condé Nast media group: Condé Nast, GQ, Glamour, Vanity Fair, Vogue, W Magazine, WIRED
|
||||||
|
- **Cracked**
|
||||||
|
- **Criterion**
|
||||||
|
- **Crunchyroll**
|
||||||
|
- **crunchyroll:playlist**
|
||||||
|
- **CSpan**: C-SPAN
|
||||||
|
- **culturebox.francetvinfo.fr**
|
||||||
|
- **dailymotion**
|
||||||
|
- **dailymotion:playlist**
|
||||||
|
- **dailymotion:user**
|
||||||
|
- **daum.net**
|
||||||
|
- **DBTV**
|
||||||
|
- **DeezerPlaylist**
|
||||||
|
- **defense.gouv.fr**
|
||||||
|
- **Discovery**
|
||||||
|
- **divxstage**: DivxStage
|
||||||
|
- **Dotsub**
|
||||||
|
- **Dropbox**
|
||||||
|
- **DrTuber**
|
||||||
|
- **DRTV**
|
||||||
|
- **Dump**
|
||||||
|
- **dvtv**: http://video.aktualne.cz/
|
||||||
|
- **EbaumsWorld**
|
||||||
|
- **eHow**
|
||||||
|
- **Einthusan**
|
||||||
|
- **eitb.tv**
|
||||||
|
- **EllenTV**
|
||||||
|
- **EllenTV:clips**
|
||||||
|
- **ElPais**: El País
|
||||||
|
- **EMPFlix**
|
||||||
|
- **Engadget**
|
||||||
|
- **Eporner**
|
||||||
|
- **Escapist**
|
||||||
|
- **EveryonesMixtape**
|
||||||
|
- **exfm**: ex.fm
|
||||||
|
- **ExpoTV**
|
||||||
|
- **ExtremeTube**
|
||||||
|
- **facebook**
|
||||||
|
- **faz.net**
|
||||||
|
- **fc2**
|
||||||
|
- **fernsehkritik.tv**
|
||||||
|
- **fernsehkritik.tv:postecke**
|
||||||
|
- **Firedrive**
|
||||||
|
- **Firstpost**
|
||||||
|
- **firsttv**: Видеоархив - Первый канал
|
||||||
|
- **Flickr**
|
||||||
|
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||||
|
- **Foxgay**
|
||||||
|
- **FoxNews**
|
||||||
|
- **france2.fr:generation-quoi**
|
||||||
|
- **FranceCulture**
|
||||||
|
- **FranceInter**
|
||||||
|
- **francetv**: France 2, 3, 4, 5 and Ô
|
||||||
|
- **francetvinfo.fr**
|
||||||
|
- **Freesound**
|
||||||
|
- **freespeech.org**
|
||||||
|
- **FreeVideo**
|
||||||
|
- **FunnyOrDie**
|
||||||
|
- **Gamekings**
|
||||||
|
- **GameOne**
|
||||||
|
- **gameone:playlist**
|
||||||
|
- **GameSpot**
|
||||||
|
- **GameStar**
|
||||||
|
- **Gametrailers**
|
||||||
|
- **GDCVault**
|
||||||
|
- **generic**: Generic downloader that works on some sites
|
||||||
|
- **GiantBomb**
|
||||||
|
- **Glide**: Glide mobile video messages (glide.me)
|
||||||
|
- **Globo**
|
||||||
|
- **GodTube**
|
||||||
|
- **GoldenMoustache**
|
||||||
|
- **Golem**
|
||||||
|
- **GorillaVid**: GorillaVid.in, daclips.in, movpod.in and fastvideo.in
|
||||||
|
- **Goshgay**
|
||||||
|
- **Grooveshark**
|
||||||
|
- **Groupon**
|
||||||
|
- **Hark**
|
||||||
|
- **Heise**
|
||||||
|
- **Helsinki**: helsinki.fi
|
||||||
|
- **HentaiStigma**
|
||||||
|
- **HornBunny**
|
||||||
|
- **HostingBulk**
|
||||||
|
- **HotNewHipHop**
|
||||||
|
- **Howcast**
|
||||||
|
- **HowStuffWorks**
|
||||||
|
- **HuffPost**: Huffington Post
|
||||||
|
- **Hypem**
|
||||||
|
- **Iconosquare**
|
||||||
|
- **ign.com**
|
||||||
|
- **imdb**: Internet Movie Database trailers
|
||||||
|
- **imdb:list**: Internet Movie Database lists
|
||||||
|
- **Ina**
|
||||||
|
- **InfoQ**
|
||||||
|
- **Instagram**
|
||||||
|
- **instagram:user**: Instagram user profile
|
||||||
|
- **InternetVideoArchive**
|
||||||
|
- **IPrima**
|
||||||
|
- **ivi**: ivi.ru
|
||||||
|
- **ivi:compilation**: ivi.ru compilations
|
||||||
|
- **Izlesene**
|
||||||
|
- **JadoreCettePub**
|
||||||
|
- **JeuxVideo**
|
||||||
|
- **Jove**
|
||||||
|
- **jpopsuki.tv**
|
||||||
|
- **Jukebox**
|
||||||
|
- **Kankan**
|
||||||
|
- **keek**
|
||||||
|
- **KeezMovies**
|
||||||
|
- **KhanAcademy**
|
||||||
|
- **KickStarter**
|
||||||
|
- **kontrtube**: KontrTube.ru - Труба зовёт
|
||||||
|
- **KrasView**: Красвью
|
||||||
|
- **Ku6**
|
||||||
|
- **la7.tv**
|
||||||
|
- **Laola1Tv**
|
||||||
|
- **lifenews**: LIFE | NEWS
|
||||||
|
- **LiveLeak**
|
||||||
|
- **livestream**
|
||||||
|
- **livestream:original**
|
||||||
|
- **lrt.lt**
|
||||||
|
- **lynda**: lynda.com videos
|
||||||
|
- **lynda:course**: lynda.com online courses
|
||||||
|
- **m6**
|
||||||
|
- **macgamestore**: MacGameStore trailers
|
||||||
|
- **mailru**: Видео@Mail.Ru
|
||||||
|
- **Malemotion**
|
||||||
|
- **MDR**
|
||||||
|
- **metacafe**
|
||||||
|
- **Metacritic**
|
||||||
|
- **Mgoon**
|
||||||
|
- **Minhateca**
|
||||||
|
- **MinistryGrid**
|
||||||
|
- **mitele.es**
|
||||||
|
- **mixcloud**
|
||||||
|
- **MLB**
|
||||||
|
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
||||||
|
- **Mofosex**
|
||||||
|
- **Mojvideo**
|
||||||
|
- **Moniker**: allmyvideos.net and vidspot.net
|
||||||
|
- **mooshare**: Mooshare.biz
|
||||||
|
- **Morningstar**: morningstar.com
|
||||||
|
- **Motherless**
|
||||||
|
- **Motorsport**: motorsport.com
|
||||||
|
- **MovieClips**
|
||||||
|
- **Moviezine**
|
||||||
|
- **movshare**: MovShare
|
||||||
|
- **MPORA**
|
||||||
|
- **MTV**
|
||||||
|
- **mtviggy.com**
|
||||||
|
- **mtvservices:embedded**
|
||||||
|
- **MuenchenTV**: münchen.tv
|
||||||
|
- **MusicPlayOn**
|
||||||
|
- **MusicVault**
|
||||||
|
- **muzu.tv**
|
||||||
|
- **MySpace**
|
||||||
|
- **MySpace:album**
|
||||||
|
- **MySpass**
|
||||||
|
- **myvideo**
|
||||||
|
- **MyVidster**
|
||||||
|
- **Naver**
|
||||||
|
- **NBA**
|
||||||
|
- **NBC**
|
||||||
|
- **NBCNews**
|
||||||
|
- **ndr**: NDR.de - Mediathek
|
||||||
|
- **NDTV**
|
||||||
|
- **NerdCubedFeed**
|
||||||
|
- **Newgrounds**
|
||||||
|
- **Newstube**
|
||||||
|
- **nfb**: National Film Board of Canada
|
||||||
|
- **nfl.com**
|
||||||
|
- **nhl.com**
|
||||||
|
- **nhl.com:videocenter**: NHL videocenter category
|
||||||
|
- **niconico**: ニコニコ動画
|
||||||
|
- **NiconicoPlaylist**
|
||||||
|
- **Noco**
|
||||||
|
- **Normalboots**
|
||||||
|
- **NosVideo**
|
||||||
|
- **novamov**: NovaMov
|
||||||
|
- **Nowness**
|
||||||
|
- **nowvideo**: NowVideo
|
||||||
|
- **npo.nl**
|
||||||
|
- **NRK**
|
||||||
|
- **NRKTV**
|
||||||
|
- **NTV**
|
||||||
|
- **Nuvid**
|
||||||
|
- **NYTimes**
|
||||||
|
- **ocw.mit.edu**
|
||||||
|
- **OktoberfestTV**
|
||||||
|
- **on.aol.com**
|
||||||
|
- **Ooyala**
|
||||||
|
- **orf:oe1**: Radio Österreich 1
|
||||||
|
- **orf:tvthek**: ORF TVthek
|
||||||
|
- **ORFFM4**: radio FM4
|
||||||
|
- **parliamentlive.tv**: UK parliament videos
|
||||||
|
- **Patreon**
|
||||||
|
- **PBS**
|
||||||
|
- **Phoenix**
|
||||||
|
- **Photobucket**
|
||||||
|
- **PlanetaPlay**
|
||||||
|
- **play.fm**
|
||||||
|
- **played.to**
|
||||||
|
- **Playvid**
|
||||||
|
- **plus.google**: Google Plus
|
||||||
|
- **pluzz.francetv.fr**
|
||||||
|
- **podomatic**
|
||||||
|
- **PornHd**
|
||||||
|
- **PornHub**
|
||||||
|
- **Pornotube**
|
||||||
|
- **PornoXO**
|
||||||
|
- **PromptFile**
|
||||||
|
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||||
|
- **Pyvideo**
|
||||||
|
- **QuickVid**
|
||||||
|
- **radio.de**
|
||||||
|
- **radiofrance**
|
||||||
|
- **Rai**
|
||||||
|
- **RBMARadio**
|
||||||
|
- **RedTube**
|
||||||
|
- **Restudy**
|
||||||
|
- **ReverbNation**
|
||||||
|
- **RingTV**
|
||||||
|
- **RottenTomatoes**
|
||||||
|
- **Roxwel**
|
||||||
|
- **RTBF**
|
||||||
|
- **RTLnow**
|
||||||
|
- **rtlxl.nl**
|
||||||
|
- **RTP**
|
||||||
|
- **RTS**: RTS.ch
|
||||||
|
- **rtve.es:alacarta**: RTVE a la carta
|
||||||
|
- **rtve.es:live**: RTVE.es live streams
|
||||||
|
- **RUHD**
|
||||||
|
- **rutube**: Rutube videos
|
||||||
|
- **rutube:channel**: Rutube channels
|
||||||
|
- **rutube:movie**: Rutube movies
|
||||||
|
- **rutube:person**: Rutube person videos
|
||||||
|
- **RUTV**: RUTV.RU
|
||||||
|
- **Sapo**: SAPO Vídeos
|
||||||
|
- **savefrom.net**
|
||||||
|
- **SBS**: sbs.com.au
|
||||||
|
- **SciVee**
|
||||||
|
- **screen.yahoo:search**: Yahoo screen search
|
||||||
|
- **Screencast**
|
||||||
|
- **ScreencastOMatic**
|
||||||
|
- **ScreenwaveMedia**
|
||||||
|
- **ServingSys**
|
||||||
|
- **Sexu**
|
||||||
|
- **SexyKarma**: Sexy Karma and Watch Indian Porn
|
||||||
|
- **Shared**
|
||||||
|
- **ShareSix**
|
||||||
|
- **Sina**
|
||||||
|
- **Slideshare**
|
||||||
|
- **Slutload**
|
||||||
|
- **smotri**: Smotri.com
|
||||||
|
- **smotri:broadcast**: Smotri.com broadcasts
|
||||||
|
- **smotri:community**: Smotri.com community videos
|
||||||
|
- **smotri:user**: Smotri.com user videos
|
||||||
|
- **Snotr**
|
||||||
|
- **Sockshare**
|
||||||
|
- **Sohu**
|
||||||
|
- **soundcloud**
|
||||||
|
- **soundcloud:playlist**
|
||||||
|
- **soundcloud:set**
|
||||||
|
- **soundcloud:user**
|
||||||
|
- **Soundgasm**
|
||||||
|
- **southpark.cc.com**
|
||||||
|
- **southpark.de**
|
||||||
|
- **Space**
|
||||||
|
- **Spankwire**
|
||||||
|
- **Spiegel**
|
||||||
|
- **Spiegel:Article**: Articles on spiegel.de
|
||||||
|
- **Spiegeltv**
|
||||||
|
- **Spike**
|
||||||
|
- **Sport5**
|
||||||
|
- **SportBox**
|
||||||
|
- **SportDeutschland**
|
||||||
|
- **SRMediathek**: Süddeutscher Rundfunk
|
||||||
|
- **stanfordoc**: Stanford Open ClassRoom
|
||||||
|
- **Steam**
|
||||||
|
- **streamcloud.eu**
|
||||||
|
- **StreamCZ**
|
||||||
|
- **SunPorno**
|
||||||
|
- **SWRMediathek**
|
||||||
|
- **Syfy**
|
||||||
|
- **SztvHu**
|
||||||
|
- **Tagesschau**
|
||||||
|
- **Tapely**
|
||||||
|
- **Tass**
|
||||||
|
- **teachertube**: teachertube.com videos
|
||||||
|
- **teachertube:user:collection**: teachertube.com user and collection videos
|
||||||
|
- **TeachingChannel**
|
||||||
|
- **Teamcoco**
|
||||||
|
- **TeamFour**
|
||||||
|
- **TechTalks**
|
||||||
|
- **techtv.mit.edu**
|
||||||
|
- **TED**
|
||||||
|
- **tegenlicht.vpro.nl**
|
||||||
|
- **TeleBruxelles**
|
||||||
|
- **telecinco.es**
|
||||||
|
- **TeleMB**
|
||||||
|
- **TenPlay**
|
||||||
|
- **TF1**
|
||||||
|
- **TheOnion**
|
||||||
|
- **ThePlatform**
|
||||||
|
- **TheSixtyOne**
|
||||||
|
- **ThisAV**
|
||||||
|
- **THVideo**
|
||||||
|
- **THVideoPlaylist**
|
||||||
|
- **tinypic**: tinypic.com videos
|
||||||
|
- **tlc.com**
|
||||||
|
- **tlc.de**
|
||||||
|
- **TMZ**
|
||||||
|
- **TNAFlix**
|
||||||
|
- **tou.tv**
|
||||||
|
- **Toypics**: Toypics user profile
|
||||||
|
- **ToypicsUser**: Toypics user profile
|
||||||
|
- **TrailerAddict** (Currently broken)
|
||||||
|
- **Trilulilu**
|
||||||
|
- **TruTube**
|
||||||
|
- **Tube8**
|
||||||
|
- **Tudou**
|
||||||
|
- **Tumblr**
|
||||||
|
- **TuneIn**
|
||||||
|
- **Turbo**
|
||||||
|
- **Tutv**
|
||||||
|
- **tv.dfb.de**
|
||||||
|
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||||
|
- **tvp.pl**
|
||||||
|
- **TVPlay**: TV3Play and related services
|
||||||
|
- **Twitch**
|
||||||
|
- **Ubu**
|
||||||
|
- **udemy**
|
||||||
|
- **udemy:course**
|
||||||
|
- **Unistra**
|
||||||
|
- **Urort**: NRK P3 Urørt
|
||||||
|
- **ustream**
|
||||||
|
- **ustream:channel**
|
||||||
|
- **Vbox7**
|
||||||
|
- **VeeHD**
|
||||||
|
- **Veoh**
|
||||||
|
- **Vesti**: Вести.Ru
|
||||||
|
- **Vevo**
|
||||||
|
- **VGTV**
|
||||||
|
- **vh1.com**
|
||||||
|
- **Vice**
|
||||||
|
- **Viddler**
|
||||||
|
- **video.google:search**: Google Video search
|
||||||
|
- **video.mit.edu**
|
||||||
|
- **VideoBam**
|
||||||
|
- **VideoDetective**
|
||||||
|
- **videofy.me**
|
||||||
|
- **videolectures.net**
|
||||||
|
- **VideoMega**
|
||||||
|
- **VideoPremium**
|
||||||
|
- **VideoTt**: video.tt - Your True Tube
|
||||||
|
- **videoweed**: VideoWeed
|
||||||
|
- **Vidme**
|
||||||
|
- **Vidzi**
|
||||||
|
- **viki**
|
||||||
|
- **vimeo**
|
||||||
|
- **vimeo:album**
|
||||||
|
- **vimeo:channel**
|
||||||
|
- **vimeo:group**
|
||||||
|
- **vimeo:likes**: Vimeo user likes
|
||||||
|
- **vimeo:review**: Review pages on vimeo
|
||||||
|
- **vimeo:user**
|
||||||
|
- **vimeo:watchlater**: Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)
|
||||||
|
- **Vimple**: Vimple.ru
|
||||||
|
- **Vine**
|
||||||
|
- **vine:user**
|
||||||
|
- **vk.com**
|
||||||
|
- **vk.com:user-videos**: vk.com:All of a user's videos
|
||||||
|
- **Vodlocker**
|
||||||
|
- **Vporn**
|
||||||
|
- **VRT**
|
||||||
|
- **vube**: Vube.com
|
||||||
|
- **VuClip**
|
||||||
|
- **vulture.com**
|
||||||
|
- **Walla**
|
||||||
|
- **WashingtonPost**
|
||||||
|
- **wat.tv**
|
||||||
|
- **WayOfTheMaster**
|
||||||
|
- **WDR**
|
||||||
|
- **wdr:mobile**
|
||||||
|
- **WDRMaus**: Sendung mit der Maus
|
||||||
|
- **Weibo**
|
||||||
|
- **Wimp**
|
||||||
|
- **Wistia**
|
||||||
|
- **WorldStarHipHop**
|
||||||
|
- **wrzuta.pl**
|
||||||
|
- **XBef**
|
||||||
|
- **XboxClips**
|
||||||
|
- **XHamster**
|
||||||
|
- **XMinus**
|
||||||
|
- **XNXX**
|
||||||
|
- **XTube**
|
||||||
|
- **XTubeUser**: XTube user profile
|
||||||
|
- **XVideos**
|
||||||
|
- **Yahoo**: Yahoo screen and movies
|
||||||
|
- **YesJapan**
|
||||||
|
- **Ynet**
|
||||||
|
- **YouJizz**
|
||||||
|
- **Youku**
|
||||||
|
- **YouPorn**
|
||||||
|
- **YourUpload**
|
||||||
|
- **youtube**: YouTube.com
|
||||||
|
- **youtube:channel**: YouTube.com channels
|
||||||
|
- **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication)
|
||||||
|
- **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
|
||||||
|
- **youtube:playlist**: YouTube.com playlists
|
||||||
|
- **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
|
||||||
|
- **youtube:search**: YouTube.com searches
|
||||||
|
- **youtube:search:date**: YouTube.com searches, newest videos first
|
||||||
|
- **youtube:search_url**: YouTube.com search URLs
|
||||||
|
- **youtube:show**: YouTube.com (multi-season) shows
|
||||||
|
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||||
|
- **youtube:toplist**: YouTube.com top lists, "yttoplist:{channel}:{list title}" (Example: "yttoplist:music:Top Tracks")
|
||||||
|
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
||||||
|
- **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
||||||
|
- **ZDF**
|
||||||
|
- **ZDFChannel**
|
||||||
|
- **zingmp3:album**: mp3.zing.vn albums
|
||||||
|
- **zingmp3:song**: mp3.zing.vn songs
|
@@ -1,2 +1,6 @@
|
|||||||
[wheel]
|
[wheel]
|
||||||
universal = True
|
universal = True
|
||||||
|
|
||||||
|
[flake8]
|
||||||
|
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build
|
||||||
|
ignore = E501
|
||||||
|
@@ -99,7 +99,7 @@ def gettestcases(include_onlymatching=False):
|
|||||||
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
|
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
def expect_info_dict(self, expected_dict, got_dict):
|
def expect_info_dict(self, got_dict, expected_dict):
|
||||||
for info_field, expected in expected_dict.items():
|
for info_field, expected in expected_dict.items():
|
||||||
if isinstance(expected, compat_str) and expected.startswith('re:'):
|
if isinstance(expected, compat_str) and expected.startswith('re:'):
|
||||||
got = got_dict.get(info_field)
|
got = got_dict.get(info_field)
|
||||||
|
@@ -155,7 +155,7 @@ def generator(test_case):
|
|||||||
if is_playlist:
|
if is_playlist:
|
||||||
self.assertEqual(res_dict['_type'], 'playlist')
|
self.assertEqual(res_dict['_type'], 'playlist')
|
||||||
self.assertTrue('entries' in res_dict)
|
self.assertTrue('entries' in res_dict)
|
||||||
expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
|
expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
|
||||||
|
|
||||||
if 'playlist_mincount' in test_case:
|
if 'playlist_mincount' in test_case:
|
||||||
assertGreaterEqual(
|
assertGreaterEqual(
|
||||||
@@ -204,7 +204,7 @@ def generator(test_case):
|
|||||||
with io.open(info_json_fn, encoding='utf-8') as infof:
|
with io.open(info_json_fn, encoding='utf-8') as infof:
|
||||||
info_dict = json.load(infof)
|
info_dict = json.load(infof)
|
||||||
|
|
||||||
expect_info_dict(self, tc.get('info_dict', {}), info_dict)
|
expect_info_dict(self, info_dict, tc.get('info_dict', {}))
|
||||||
finally:
|
finally:
|
||||||
try_rm_tcs_files()
|
try_rm_tcs_files()
|
||||||
if is_playlist and res_dict is not None and res_dict.get('entries'):
|
if is_playlist and res_dict is not None and res_dict.get('entries'):
|
||||||
|
@@ -88,6 +88,14 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
|
|||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertTrue(subtitles['it'] is not None)
|
self.assertTrue(subtitles['it'] is not None)
|
||||||
|
|
||||||
|
def test_youtube_translated_subtitles(self):
|
||||||
|
# This video has a subtitles track, which can be translated
|
||||||
|
self.url = 'Ky9eprVWzlI'
|
||||||
|
self.DL.params['writeautomaticsub'] = True
|
||||||
|
self.DL.params['subtitleslangs'] = ['it']
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertTrue(subtitles['it'] is not None)
|
||||||
|
|
||||||
def test_youtube_nosubtitles(self):
|
def test_youtube_nosubtitles(self):
|
||||||
self.DL.expect_warning('video doesn\'t have subtitles')
|
self.DL.expect_warning('video doesn\'t have subtitles')
|
||||||
self.url = 'n5BB19UTcdA'
|
self.url = 'n5BB19UTcdA'
|
||||||
|
@@ -7,9 +7,7 @@ import unittest
|
|||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
import io
|
import io
|
||||||
import os
|
|
||||||
import re
|
import re
|
||||||
import unittest
|
|
||||||
|
|
||||||
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
|
@@ -16,39 +16,40 @@ import json
|
|||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from youtube_dl.utils import (
|
from youtube_dl.utils import (
|
||||||
|
args_to_str,
|
||||||
clean_html,
|
clean_html,
|
||||||
DateRange,
|
DateRange,
|
||||||
|
detect_exe_version,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
|
escape_rfc3986,
|
||||||
|
escape_url,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
orderedSet,
|
|
||||||
OnDemandPagedList,
|
|
||||||
InAdvancePagedList,
|
InAdvancePagedList,
|
||||||
|
intlist_to_bytes,
|
||||||
|
js_to_json,
|
||||||
|
limit_length,
|
||||||
|
OnDemandPagedList,
|
||||||
|
orderedSet,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
parse_filesize,
|
||||||
|
parse_iso8601,
|
||||||
read_batch_urls,
|
read_batch_urls,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
shell_quote,
|
shell_quote,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
strip_jsonp,
|
||||||
struct_unpack,
|
struct_unpack,
|
||||||
timeconvert,
|
timeconvert,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
|
uppercase_escape,
|
||||||
url_basename,
|
url_basename,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
xpath_with_ns,
|
|
||||||
parse_iso8601,
|
|
||||||
strip_jsonp,
|
|
||||||
uppercase_escape,
|
|
||||||
limit_length,
|
|
||||||
escape_rfc3986,
|
|
||||||
escape_url,
|
|
||||||
js_to_json,
|
|
||||||
intlist_to_bytes,
|
|
||||||
args_to_str,
|
|
||||||
parse_filesize,
|
|
||||||
version_tuple,
|
version_tuple,
|
||||||
|
xpath_with_ns,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -390,5 +391,16 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
|
self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
|
||||||
self.assertEqual(version_tuple('10.1-6'), (10, 1, 6)) # avconv style
|
self.assertEqual(version_tuple('10.1-6'), (10, 1, 6)) # avconv style
|
||||||
|
|
||||||
|
def test_detect_exe_version(self):
|
||||||
|
self.assertEqual(detect_exe_version('''ffmpeg version 1.2.1
|
||||||
|
built on May 27 2013 08:37:26 with gcc 4.7 (Debian 4.7.3-4)
|
||||||
|
configuration: --prefix=/usr --extra-'''), '1.2.1')
|
||||||
|
self.assertEqual(detect_exe_version('''ffmpeg version N-63176-g1fb4685
|
||||||
|
built on May 15 2014 22:09:06 with gcc 4.8.2 (GCC)'''), 'N-63176-g1fb4685')
|
||||||
|
self.assertEqual(detect_exe_version('''X server found. dri2 connection failed!
|
||||||
|
Trying to open render node...
|
||||||
|
Success at /dev/dri/renderD128.
|
||||||
|
ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -1,76 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
# Allow direct execution
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import unittest
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
||||||
|
|
||||||
from test.helper import get_params
|
|
||||||
|
|
||||||
|
|
||||||
import io
|
|
||||||
import json
|
|
||||||
|
|
||||||
import youtube_dl.YoutubeDL
|
|
||||||
import youtube_dl.extractor
|
|
||||||
|
|
||||||
|
|
||||||
class YoutubeDL(youtube_dl.YoutubeDL):
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super(YoutubeDL, self).__init__(*args, **kwargs)
|
|
||||||
self.to_stderr = self.to_screen
|
|
||||||
|
|
||||||
params = get_params({
|
|
||||||
'writeinfojson': True,
|
|
||||||
'skip_download': True,
|
|
||||||
'writedescription': True,
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
TEST_ID = 'BaW_jenozKc'
|
|
||||||
INFO_JSON_FILE = TEST_ID + '.info.json'
|
|
||||||
DESCRIPTION_FILE = TEST_ID + '.mp4.description'
|
|
||||||
EXPECTED_DESCRIPTION = '''test chars: "'/\ä↭𝕐
|
|
||||||
test URL: https://github.com/rg3/youtube-dl/issues/1892
|
|
||||||
|
|
||||||
This is a test video for youtube-dl.
|
|
||||||
|
|
||||||
For more information, contact phihag@phihag.de .'''
|
|
||||||
|
|
||||||
|
|
||||||
class TestInfoJSON(unittest.TestCase):
|
|
||||||
def setUp(self):
|
|
||||||
# Clear old files
|
|
||||||
self.tearDown()
|
|
||||||
|
|
||||||
def test_info_json(self):
|
|
||||||
ie = youtube_dl.extractor.YoutubeIE()
|
|
||||||
ydl = YoutubeDL(params)
|
|
||||||
ydl.add_info_extractor(ie)
|
|
||||||
ydl.download([TEST_ID])
|
|
||||||
self.assertTrue(os.path.exists(INFO_JSON_FILE))
|
|
||||||
with io.open(INFO_JSON_FILE, 'r', encoding='utf-8') as jsonf:
|
|
||||||
jd = json.load(jsonf)
|
|
||||||
self.assertEqual(jd['upload_date'], '20121002')
|
|
||||||
self.assertEqual(jd['description'], EXPECTED_DESCRIPTION)
|
|
||||||
self.assertEqual(jd['id'], TEST_ID)
|
|
||||||
self.assertEqual(jd['extractor'], 'youtube')
|
|
||||||
self.assertEqual(jd['title'], '''youtube-dl test video "'/\ä↭𝕐''')
|
|
||||||
self.assertEqual(jd['uploader'], 'Philipp Hagemeister')
|
|
||||||
|
|
||||||
self.assertTrue(os.path.exists(DESCRIPTION_FILE))
|
|
||||||
with io.open(DESCRIPTION_FILE, 'r', encoding='utf-8') as descf:
|
|
||||||
descr = descf.read()
|
|
||||||
self.assertEqual(descr, EXPECTED_DESCRIPTION)
|
|
||||||
|
|
||||||
def tearDown(self):
|
|
||||||
if os.path.exists(INFO_JSON_FILE):
|
|
||||||
os.remove(INFO_JSON_FILE)
|
|
||||||
if os.path.exists(DESCRIPTION_FILE):
|
|
||||||
os.remove(DESCRIPTION_FILE)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
unittest.main()
|
|
@@ -27,6 +27,7 @@ from .compat import (
|
|||||||
compat_cookiejar,
|
compat_cookiejar,
|
||||||
compat_expanduser,
|
compat_expanduser,
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
|
compat_kwargs,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
@@ -67,7 +68,11 @@ from .cache import Cache
|
|||||||
from .extractor import get_info_extractor, gen_extractors
|
from .extractor import get_info_extractor, gen_extractors
|
||||||
from .downloader import get_suitable_downloader
|
from .downloader import get_suitable_downloader
|
||||||
from .downloader.rtmp import rtmpdump_version
|
from .downloader.rtmp import rtmpdump_version
|
||||||
from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
|
from .postprocessor import (
|
||||||
|
FFmpegMergerPP,
|
||||||
|
FFmpegPostProcessor,
|
||||||
|
get_postprocessor,
|
||||||
|
)
|
||||||
from .version import __version__
|
from .version import __version__
|
||||||
|
|
||||||
|
|
||||||
@@ -116,7 +121,7 @@ class YoutubeDL(object):
|
|||||||
dump_single_json: Force printing the info_dict of the whole playlist
|
dump_single_json: Force printing the info_dict of the whole playlist
|
||||||
(or video) as a single JSON line.
|
(or video) as a single JSON line.
|
||||||
simulate: Do not download the video files.
|
simulate: Do not download the video files.
|
||||||
format: Video format code.
|
format: Video format code. See options.py for more information.
|
||||||
format_limit: Highest quality format to try.
|
format_limit: Highest quality format to try.
|
||||||
outtmpl: Template for output names.
|
outtmpl: Template for output names.
|
||||||
restrictfilenames: Do not allow "&" and spaces in file names
|
restrictfilenames: Do not allow "&" and spaces in file names
|
||||||
@@ -176,6 +181,28 @@ class YoutubeDL(object):
|
|||||||
extract_flat: Do not resolve URLs, return the immediate result.
|
extract_flat: Do not resolve URLs, return the immediate result.
|
||||||
Pass in 'in_playlist' to only show this behavior for
|
Pass in 'in_playlist' to only show this behavior for
|
||||||
playlist items.
|
playlist items.
|
||||||
|
postprocessors: A list of dictionaries, each with an entry
|
||||||
|
* key: The name of the postprocessor. See
|
||||||
|
youtube_dl/postprocessor/__init__.py for a list.
|
||||||
|
as well as any further keyword arguments for the
|
||||||
|
postprocessor.
|
||||||
|
progress_hooks: A list of functions that get called on download
|
||||||
|
progress, with a dictionary with the entries
|
||||||
|
* filename: The final filename
|
||||||
|
* status: One of "downloading" and "finished"
|
||||||
|
|
||||||
|
The dict may also have some of the following entries:
|
||||||
|
|
||||||
|
* downloaded_bytes: Bytes on disk
|
||||||
|
* total_bytes: Size of the whole file, None if unknown
|
||||||
|
* tmpfilename: The filename we're currently writing to
|
||||||
|
* eta: The estimated time in seconds, None if unknown
|
||||||
|
* speed: The download speed in bytes/second, None if
|
||||||
|
unknown
|
||||||
|
|
||||||
|
Progress hooks are guaranteed to be called at least once
|
||||||
|
(with status "finished") if the download is successful.
|
||||||
|
|
||||||
|
|
||||||
The following parameters are not used by YoutubeDL itself, they are used by
|
The following parameters are not used by YoutubeDL itself, they are used by
|
||||||
the FileDownloader:
|
the FileDownloader:
|
||||||
@@ -256,6 +283,16 @@ class YoutubeDL(object):
|
|||||||
self.print_debug_header()
|
self.print_debug_header()
|
||||||
self.add_default_info_extractors()
|
self.add_default_info_extractors()
|
||||||
|
|
||||||
|
for pp_def_raw in self.params.get('postprocessors', []):
|
||||||
|
pp_class = get_postprocessor(pp_def_raw['key'])
|
||||||
|
pp_def = dict(pp_def_raw)
|
||||||
|
del pp_def['key']
|
||||||
|
pp = pp_class(self, **compat_kwargs(pp_def))
|
||||||
|
self.add_post_processor(pp)
|
||||||
|
|
||||||
|
for ph in self.params.get('progress_hooks', []):
|
||||||
|
self.add_progress_hook(ph)
|
||||||
|
|
||||||
def warn_if_short_id(self, argv):
|
def warn_if_short_id(self, argv):
|
||||||
# short YouTube ID starting with dash?
|
# short YouTube ID starting with dash?
|
||||||
idxs = [
|
idxs = [
|
||||||
@@ -675,7 +712,7 @@ class YoutubeDL(object):
|
|||||||
entries = entries[::-1]
|
entries = entries[::-1]
|
||||||
|
|
||||||
for i, entry in enumerate(entries, 1):
|
for i, entry in enumerate(entries, 1):
|
||||||
self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
|
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
|
||||||
extra = {
|
extra = {
|
||||||
'n_entries': n_entries,
|
'n_entries': n_entries,
|
||||||
'playlist': playlist,
|
'playlist': playlist,
|
||||||
@@ -979,13 +1016,13 @@ class YoutubeDL(object):
|
|||||||
descfn = filename + '.description'
|
descfn = filename + '.description'
|
||||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
|
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
|
||||||
self.to_screen('[info] Video description is already present')
|
self.to_screen('[info] Video description is already present')
|
||||||
|
elif info_dict.get('description') is None:
|
||||||
|
self.report_warning('There\'s no description to write.')
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
self.to_screen('[info] Writing video description to: ' + descfn)
|
self.to_screen('[info] Writing video description to: ' + descfn)
|
||||||
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
||||||
descfile.write(info_dict['description'])
|
descfile.write(info_dict['description'])
|
||||||
except (KeyError, TypeError):
|
|
||||||
self.report_warning('There\'s no description to write.')
|
|
||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error('Cannot write description file ' + descfn)
|
self.report_error('Cannot write description file ' + descfn)
|
||||||
return
|
return
|
||||||
@@ -1112,8 +1149,7 @@ class YoutubeDL(object):
|
|||||||
except (PostProcessingError) as err:
|
except (PostProcessingError) as err:
|
||||||
self.report_error('postprocessing: %s' % str(err))
|
self.report_error('postprocessing: %s' % str(err))
|
||||||
return
|
return
|
||||||
|
self.record_download_archive(info_dict)
|
||||||
self.record_download_archive(info_dict)
|
|
||||||
|
|
||||||
def download(self, url_list):
|
def download(self, url_list):
|
||||||
"""Download a given list of URLs."""
|
"""Download a given list of URLs."""
|
||||||
@@ -1297,7 +1333,9 @@ class YoutubeDL(object):
|
|||||||
formats = info_dict.get('formats', [info_dict])
|
formats = info_dict.get('formats', [info_dict])
|
||||||
idlen = max(len('format code'),
|
idlen = max(len('format code'),
|
||||||
max(len(f['format_id']) for f in formats))
|
max(len(f['format_id']) for f in formats))
|
||||||
formats_s = [line(f, idlen) for f in formats]
|
formats_s = [
|
||||||
|
line(f, idlen) for f in formats
|
||||||
|
if f.get('preference') is None or f['preference'] >= -1000]
|
||||||
if len(formats) > 1:
|
if len(formats) > 1:
|
||||||
formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
|
formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
|
||||||
formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
|
formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
|
||||||
|
@@ -40,16 +40,6 @@ from .downloader import (
|
|||||||
)
|
)
|
||||||
from .extractor import gen_extractors
|
from .extractor import gen_extractors
|
||||||
from .YoutubeDL import YoutubeDL
|
from .YoutubeDL import YoutubeDL
|
||||||
from .postprocessor import (
|
|
||||||
AtomicParsleyPP,
|
|
||||||
FFmpegAudioFixPP,
|
|
||||||
FFmpegMetadataPP,
|
|
||||||
FFmpegVideoConvertor,
|
|
||||||
FFmpegExtractAudioPP,
|
|
||||||
FFmpegEmbedSubtitlePP,
|
|
||||||
XAttrMetadataPP,
|
|
||||||
ExecAfterDownloadPP,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _real_main(argv=None):
|
def _real_main(argv=None):
|
||||||
@@ -122,7 +112,7 @@ def _real_main(argv=None):
|
|||||||
if desc is False:
|
if desc is False:
|
||||||
continue
|
continue
|
||||||
if hasattr(ie, 'SEARCH_KEY'):
|
if hasattr(ie, 'SEARCH_KEY'):
|
||||||
_SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny')
|
_SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
|
||||||
_COUNTS = ('', '5', '10', 'all')
|
_COUNTS = ('', '5', '10', 'all')
|
||||||
desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
|
desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
|
||||||
compat_print(desc)
|
compat_print(desc)
|
||||||
@@ -212,6 +202,43 @@ def _real_main(argv=None):
|
|||||||
any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
|
any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
|
||||||
download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
|
download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
|
||||||
|
|
||||||
|
# PostProcessors
|
||||||
|
postprocessors = []
|
||||||
|
# Add the metadata pp first, the other pps will copy it
|
||||||
|
if opts.addmetadata:
|
||||||
|
postprocessors.append({'key': 'FFmpegMetadata'})
|
||||||
|
if opts.extractaudio:
|
||||||
|
postprocessors.append({
|
||||||
|
'key': 'FFmpegExtractAudio',
|
||||||
|
'preferredcodec': opts.audioformat,
|
||||||
|
'preferredquality': opts.audioquality,
|
||||||
|
'nopostoverwrites': opts.nopostoverwrites,
|
||||||
|
})
|
||||||
|
if opts.recodevideo:
|
||||||
|
postprocessors.append({
|
||||||
|
'key': 'FFmpegVideoConvertor',
|
||||||
|
'preferedformat': opts.recodevideo,
|
||||||
|
})
|
||||||
|
if opts.embedsubtitles:
|
||||||
|
postprocessors.append({
|
||||||
|
'key': 'FFmpegEmbedSubtitle',
|
||||||
|
'subtitlesformat': opts.subtitlesformat,
|
||||||
|
})
|
||||||
|
if opts.xattrs:
|
||||||
|
postprocessors.append({'key': 'XAttrMetadata'})
|
||||||
|
if opts.embedthumbnail:
|
||||||
|
if not opts.addmetadata:
|
||||||
|
postprocessors.append({'key': 'FFmpegAudioFix'})
|
||||||
|
postprocessors.append({'key': 'AtomicParsley'})
|
||||||
|
# Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
|
||||||
|
# So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
|
||||||
|
if opts.exec_cmd:
|
||||||
|
postprocessors.append({
|
||||||
|
'key': 'ExecAfterDownload',
|
||||||
|
'verboseOutput': opts.verbose,
|
||||||
|
'exec_cmd': opts.exec_cmd,
|
||||||
|
})
|
||||||
|
|
||||||
ydl_opts = {
|
ydl_opts = {
|
||||||
'usenetrc': opts.usenetrc,
|
'usenetrc': opts.usenetrc,
|
||||||
'username': opts.username,
|
'username': opts.username,
|
||||||
@@ -297,32 +324,10 @@ def _real_main(argv=None):
|
|||||||
'encoding': opts.encoding,
|
'encoding': opts.encoding,
|
||||||
'exec_cmd': opts.exec_cmd,
|
'exec_cmd': opts.exec_cmd,
|
||||||
'extract_flat': opts.extract_flat,
|
'extract_flat': opts.extract_flat,
|
||||||
|
'postprocessors': postprocessors,
|
||||||
}
|
}
|
||||||
|
|
||||||
with YoutubeDL(ydl_opts) as ydl:
|
with YoutubeDL(ydl_opts) as ydl:
|
||||||
# PostProcessors
|
|
||||||
# Add the metadata pp first, the other pps will copy it
|
|
||||||
if opts.addmetadata:
|
|
||||||
ydl.add_post_processor(FFmpegMetadataPP())
|
|
||||||
if opts.extractaudio:
|
|
||||||
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
|
|
||||||
if opts.recodevideo:
|
|
||||||
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
|
|
||||||
if opts.embedsubtitles:
|
|
||||||
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
|
|
||||||
if opts.xattrs:
|
|
||||||
ydl.add_post_processor(XAttrMetadataPP())
|
|
||||||
if opts.embedthumbnail:
|
|
||||||
if not opts.addmetadata:
|
|
||||||
ydl.add_post_processor(FFmpegAudioFixPP())
|
|
||||||
ydl.add_post_processor(AtomicParsleyPP())
|
|
||||||
|
|
||||||
# Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
|
|
||||||
# So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
|
|
||||||
if opts.exec_cmd:
|
|
||||||
ydl.add_post_processor(ExecAfterDownloadPP(
|
|
||||||
verboseOutput=opts.verbose, exec_cmd=opts.exec_cmd))
|
|
||||||
|
|
||||||
# Update version
|
# Update version
|
||||||
if opts.update_self:
|
if opts.update_self:
|
||||||
update_self(ydl.to_screen, opts.verbose)
|
update_self(ydl.to_screen, opts.verbose)
|
||||||
|
@@ -285,7 +285,7 @@ class FileDownloader(object):
|
|||||||
Return True on success and False otherwise
|
Return True on success and False otherwise
|
||||||
"""
|
"""
|
||||||
# Check file already present
|
# Check file already present
|
||||||
if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
|
if filename != '-' and self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
|
||||||
self.report_file_already_downloaded(filename)
|
self.report_file_already_downloaded(filename)
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
@@ -305,19 +305,6 @@ class FileDownloader(object):
|
|||||||
ph(status)
|
ph(status)
|
||||||
|
|
||||||
def add_progress_hook(self, ph):
|
def add_progress_hook(self, ph):
|
||||||
""" ph gets called on download progress, with a dictionary with the entries
|
# See YoutubeDl.py (search for progress_hooks) for a description of
|
||||||
* filename: The final filename
|
# this interface
|
||||||
* status: One of "downloading" and "finished"
|
|
||||||
|
|
||||||
It can also have some of the following entries:
|
|
||||||
|
|
||||||
* downloaded_bytes: Bytes on disks
|
|
||||||
* total_bytes: Total bytes, None if unknown
|
|
||||||
* tmpfilename: The filename we're currently writing to
|
|
||||||
* eta: The estimated time in seconds, None if unknown
|
|
||||||
* speed: The download speed in bytes/second, None if unknown
|
|
||||||
|
|
||||||
Hooks are guaranteed to be called at least once (with status "finished")
|
|
||||||
if the download is successful.
|
|
||||||
"""
|
|
||||||
self._progress_hooks.append(ph)
|
self._progress_hooks.append(ph)
|
||||||
|
@@ -203,7 +203,7 @@ def write_flv_header(stream, metadata):
|
|||||||
stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
|
stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
|
||||||
stream.write(metadata)
|
stream.write(metadata)
|
||||||
# Magic numbers extracted from the output files produced by AdobeHDS.php
|
# Magic numbers extracted from the output files produced by AdobeHDS.php
|
||||||
#(https://github.com/K-S-V/Scripts)
|
# (https://github.com/K-S-V/Scripts)
|
||||||
stream.write(b'\x00\x00\x01\x73')
|
stream.write(b'\x00\x00\x01\x73')
|
||||||
|
|
||||||
|
|
||||||
|
@@ -6,6 +6,7 @@ import subprocess
|
|||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from ..compat import compat_subprocess_get_DEVNULL
|
from ..compat import compat_subprocess_get_DEVNULL
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
check_executable,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -20,11 +21,7 @@ class MplayerFD(FileDownloader):
|
|||||||
'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
|
'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
|
||||||
'-dumpstream', '-dumpfile', tmpfilename, url]
|
'-dumpstream', '-dumpfile', tmpfilename, url]
|
||||||
# Check for mplayer first
|
# Check for mplayer first
|
||||||
try:
|
if not check_executable('mplayer', ['-h']):
|
||||||
subprocess.call(
|
|
||||||
['mplayer', '-h'],
|
|
||||||
stdout=compat_subprocess_get_DEVNULL(), stderr=subprocess.STDOUT)
|
|
||||||
except (OSError, IOError):
|
|
||||||
self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0])
|
self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0])
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@@ -185,7 +185,7 @@ class RtmpFD(FileDownloader):
|
|||||||
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
if prevsize == cursize and retval == RD_FAILED:
|
if prevsize == cursize and retval == RD_FAILED:
|
||||||
break
|
break
|
||||||
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
||||||
if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024:
|
if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024:
|
||||||
self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
||||||
retval = RD_SUCCESS
|
retval = RD_SUCCESS
|
||||||
|
@@ -3,8 +3,11 @@ from __future__ import unicode_literals
|
|||||||
from .abc import ABCIE
|
from .abc import ABCIE
|
||||||
from .academicearth import AcademicEarthCourseIE
|
from .academicearth import AcademicEarthCourseIE
|
||||||
from .addanime import AddAnimeIE
|
from .addanime import AddAnimeIE
|
||||||
|
from .adobetv import AdobeTVIE
|
||||||
from .adultswim import AdultSwimIE
|
from .adultswim import AdultSwimIE
|
||||||
from .aftonbladet import AftonbladetIE
|
from .aftonbladet import AftonbladetIE
|
||||||
|
from .aljazeera import AlJazeeraIE
|
||||||
|
from .alphaporno import AlphaPornoIE
|
||||||
from .anitube import AnitubeIE
|
from .anitube import AnitubeIE
|
||||||
from .anysex import AnySexIE
|
from .anysex import AnySexIE
|
||||||
from .aol import AolIE
|
from .aol import AolIE
|
||||||
@@ -22,6 +25,7 @@ from .arte import (
|
|||||||
ArteTVDDCIE,
|
ArteTVDDCIE,
|
||||||
ArteTVEmbedIE,
|
ArteTVEmbedIE,
|
||||||
)
|
)
|
||||||
|
from .atresplayer import AtresPlayerIE
|
||||||
from .audiomack import AudiomackIE
|
from .audiomack import AudiomackIE
|
||||||
from .auengine import AUEngineIE
|
from .auengine import AUEngineIE
|
||||||
from .azubu import AzubuIE
|
from .azubu import AzubuIE
|
||||||
@@ -62,10 +66,12 @@ from .cnet import CNETIE
|
|||||||
from .cnn import (
|
from .cnn import (
|
||||||
CNNIE,
|
CNNIE,
|
||||||
CNNBlogsIE,
|
CNNBlogsIE,
|
||||||
|
CNNArticleIE,
|
||||||
)
|
)
|
||||||
from .collegehumor import CollegeHumorIE
|
from .collegehumor import CollegeHumorIE
|
||||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||||
from .comcarcoff import ComCarCoffIE
|
from .comcarcoff import ComCarCoffIE
|
||||||
|
from .commonmistakes import CommonMistakesIE
|
||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
from .cracked import CrackedIE
|
from .cracked import CrackedIE
|
||||||
from .criterion import CriterionIE
|
from .criterion import CriterionIE
|
||||||
@@ -87,12 +93,14 @@ from .dotsub import DotsubIE
|
|||||||
from .dreisat import DreiSatIE
|
from .dreisat import DreiSatIE
|
||||||
from .drtuber import DrTuberIE
|
from .drtuber import DrTuberIE
|
||||||
from .drtv import DRTVIE
|
from .drtv import DRTVIE
|
||||||
|
from .dvtv import DVTVIE
|
||||||
from .dump import DumpIE
|
from .dump import DumpIE
|
||||||
from .defense import DefenseGouvFrIE
|
from .defense import DefenseGouvFrIE
|
||||||
from .discovery import DiscoveryIE
|
from .discovery import DiscoveryIE
|
||||||
from .divxstage import DivxStageIE
|
from .divxstage import DivxStageIE
|
||||||
from .dropbox import DropboxIE
|
from .dropbox import DropboxIE
|
||||||
from .ebaumsworld import EbaumsWorldIE
|
from .ebaumsworld import EbaumsWorldIE
|
||||||
|
from .echomsk import EchoMskIE
|
||||||
from .ehow import EHowIE
|
from .ehow import EHowIE
|
||||||
from .eighttracks import EightTracksIE
|
from .eighttracks import EightTracksIE
|
||||||
from .einthusan import EinthusanIE
|
from .einthusan import EinthusanIE
|
||||||
@@ -105,6 +113,7 @@ from .elpais import ElPaisIE
|
|||||||
from .empflix import EMPFlixIE
|
from .empflix import EMPFlixIE
|
||||||
from .engadget import EngadgetIE
|
from .engadget import EngadgetIE
|
||||||
from .eporner import EpornerIE
|
from .eporner import EpornerIE
|
||||||
|
from .eroprofile import EroProfileIE
|
||||||
from .escapist import EscapistIE
|
from .escapist import EscapistIE
|
||||||
from .everyonesmixtape import EveryonesMixtapeIE
|
from .everyonesmixtape import EveryonesMixtapeIE
|
||||||
from .exfm import ExfmIE
|
from .exfm import ExfmIE
|
||||||
@@ -163,8 +172,10 @@ from .grooveshark import GroovesharkIE
|
|||||||
from .groupon import GrouponIE
|
from .groupon import GrouponIE
|
||||||
from .hark import HarkIE
|
from .hark import HarkIE
|
||||||
from .heise import HeiseIE
|
from .heise import HeiseIE
|
||||||
|
from .hellporno import HellPornoIE
|
||||||
from .helsinki import HelsinkiIE
|
from .helsinki import HelsinkiIE
|
||||||
from .hentaistigma import HentaiStigmaIE
|
from .hentaistigma import HentaiStigmaIE
|
||||||
|
from .hitbox import HitboxIE, HitboxLiveIE
|
||||||
from .hornbunny import HornBunnyIE
|
from .hornbunny import HornBunnyIE
|
||||||
from .hostingbulk import HostingBulkIE
|
from .hostingbulk import HostingBulkIE
|
||||||
from .hotnewhiphop import HotNewHipHopIE
|
from .hotnewhiphop import HotNewHipHopIE
|
||||||
@@ -262,6 +273,7 @@ from .nbc import (
|
|||||||
)
|
)
|
||||||
from .ndr import NDRIE
|
from .ndr import NDRIE
|
||||||
from .ndtv import NDTVIE
|
from .ndtv import NDTVIE
|
||||||
|
from .nerdcubed import NerdCubedFeedIE
|
||||||
from .newgrounds import NewgroundsIE
|
from .newgrounds import NewgroundsIE
|
||||||
from .newstube import NewstubeIE
|
from .newstube import NewstubeIE
|
||||||
from .nfb import NFBIE
|
from .nfb import NFBIE
|
||||||
@@ -288,6 +300,7 @@ from .nytimes import NYTimesIE
|
|||||||
from .nuvid import NuvidIE
|
from .nuvid import NuvidIE
|
||||||
from .oktoberfesttv import OktoberfestTVIE
|
from .oktoberfesttv import OktoberfestTVIE
|
||||||
from .ooyala import OoyalaIE
|
from .ooyala import OoyalaIE
|
||||||
|
from .openfilm import OpenFilmIE
|
||||||
from .orf import (
|
from .orf import (
|
||||||
ORFTVthekIE,
|
ORFTVthekIE,
|
||||||
ORFOE1IE,
|
ORFOE1IE,
|
||||||
@@ -341,6 +354,7 @@ from .savefrom import SaveFromIE
|
|||||||
from .sbs import SBSIE
|
from .sbs import SBSIE
|
||||||
from .scivee import SciVeeIE
|
from .scivee import SciVeeIE
|
||||||
from .screencast import ScreencastIE
|
from .screencast import ScreencastIE
|
||||||
|
from .screencastomatic import ScreencastOMaticIE
|
||||||
from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE
|
from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE
|
||||||
from .servingsys import ServingSysIE
|
from .servingsys import ServingSysIE
|
||||||
from .sexu import SexuIE
|
from .sexu import SexuIE
|
||||||
@@ -401,6 +415,7 @@ from .ted import TEDIE
|
|||||||
from .telebruxelles import TeleBruxellesIE
|
from .telebruxelles import TeleBruxellesIE
|
||||||
from .telecinco import TelecincoIE
|
from .telecinco import TelecincoIE
|
||||||
from .telemb import TeleMBIE
|
from .telemb import TeleMBIE
|
||||||
|
from .teletask import TeleTaskIE
|
||||||
from .tenplay import TenPlayIE
|
from .tenplay import TenPlayIE
|
||||||
from .testurl import TestURLIE
|
from .testurl import TestURLIE
|
||||||
from .tf1 import TF1IE
|
from .tf1 import TF1IE
|
||||||
@@ -459,6 +474,7 @@ from .videott import VideoTtIE
|
|||||||
from .videoweed import VideoWeedIE
|
from .videoweed import VideoWeedIE
|
||||||
from .vidme import VidmeIE
|
from .vidme import VidmeIE
|
||||||
from .vidzi import VidziIE
|
from .vidzi import VidziIE
|
||||||
|
from .vier import VierIE, VierVideosIE
|
||||||
from .vimeo import (
|
from .vimeo import (
|
||||||
VimeoIE,
|
VimeoIE,
|
||||||
VimeoAlbumIE,
|
VimeoAlbumIE,
|
||||||
@@ -506,10 +522,12 @@ from .xminus import XMinusIE
|
|||||||
from .xnxx import XNXXIE
|
from .xnxx import XNXXIE
|
||||||
from .xvideos import XVideosIE
|
from .xvideos import XVideosIE
|
||||||
from .xtube import XTubeUserIE, XTubeIE
|
from .xtube import XTubeUserIE, XTubeIE
|
||||||
|
from .xxxymovies import XXXYMoviesIE
|
||||||
from .yahoo import (
|
from .yahoo import (
|
||||||
YahooIE,
|
YahooIE,
|
||||||
YahooSearchIE,
|
YahooSearchIE,
|
||||||
)
|
)
|
||||||
|
from .yesjapan import YesJapanIE
|
||||||
from .ynet import YnetIE
|
from .ynet import YnetIE
|
||||||
from .youjizz import YouJizzIE
|
from .youjizz import YouJizzIE
|
||||||
from .youku import YoukuIE
|
from .youku import YoukuIE
|
||||||
@@ -528,6 +546,7 @@ from .youtube import (
|
|||||||
YoutubeShowIE,
|
YoutubeShowIE,
|
||||||
YoutubeSubscriptionsIE,
|
YoutubeSubscriptionsIE,
|
||||||
YoutubeTopListIE,
|
YoutubeTopListIE,
|
||||||
|
YoutubeTruncatedIDIE,
|
||||||
YoutubeTruncatedURLIE,
|
YoutubeTruncatedURLIE,
|
||||||
YoutubeUserIE,
|
YoutubeUserIE,
|
||||||
YoutubeWatchLaterIE,
|
YoutubeWatchLaterIE,
|
||||||
|
70
youtube_dl/extractor/adobetv.py
Normal file
70
youtube_dl/extractor/adobetv.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
parse_duration,
|
||||||
|
unified_strdate,
|
||||||
|
str_to_int,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AdobeTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://tv\.adobe\.com/watch/[^/]+/(?P<id>[^/]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/',
|
||||||
|
'md5': '9bc5727bcdd55251f35ad311ca74fa1e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop',
|
||||||
|
'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311',
|
||||||
|
'thumbnail': 're:https?://.*\.jpg$',
|
||||||
|
'upload_date': '20110914',
|
||||||
|
'duration': 60,
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
player = self._parse_json(
|
||||||
|
self._search_regex(r'html5player:\s*({.+?})\s*\n', webpage, 'player'),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
title = player.get('title') or self._search_regex(
|
||||||
|
r'data-title="([^"]+)"', webpage, 'title')
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
|
upload_date = unified_strdate(
|
||||||
|
self._html_search_meta('datepublished', webpage, 'upload date'))
|
||||||
|
|
||||||
|
duration = parse_duration(
|
||||||
|
self._html_search_meta('duration', webpage, 'duration')
|
||||||
|
or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration'))
|
||||||
|
|
||||||
|
view_count = str_to_int(self._search_regex(
|
||||||
|
r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>',
|
||||||
|
webpage, 'view count'))
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': source['src'],
|
||||||
|
'format_id': source.get('quality') or source['src'].split('-')[-1].split('.')[0] or None,
|
||||||
|
'tbr': source.get('bitrate'),
|
||||||
|
} for source in player['sources']]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
35
youtube_dl/extractor/aljazeera.py
Normal file
35
youtube_dl/extractor/aljazeera.py
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class AlJazeeraIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://www\.aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3792260579001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The Slum - Episode 1: Deliverance',
|
||||||
|
'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.',
|
||||||
|
'uploader': 'Al Jazeera English',
|
||||||
|
},
|
||||||
|
'add_ie': ['Brightcove'],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
program_name = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, program_name)
|
||||||
|
brightcove_id = self._search_regex(
|
||||||
|
r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url',
|
||||||
|
'url': (
|
||||||
|
'brightcove:'
|
||||||
|
'playerKey=AQ~~%2CAAAAmtVJIFk~%2CTVGOQ5ZTwJbeMWnq5d_H4MOM57xfzApc'
|
||||||
|
'&%40videoPlayer={0}'.format(brightcove_id)
|
||||||
|
),
|
||||||
|
'ie_key': 'Brightcove',
|
||||||
|
}
|
@@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class AllocineIE(InfoExtractor):
|
class AllocineIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=)(?P<id>[0-9]+)(?:\.html)?'
|
_VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P<id>[0-9]+)(?:\.html)?'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html',
|
'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html',
|
||||||
@@ -44,6 +44,9 @@ class AllocineIE(InfoExtractor):
|
|||||||
'description': 'md5:71742e3a74b0d692c7fce0dd2017a4ac',
|
'description': 'md5:71742e3a74b0d692c7fce0dd2017a4ac',
|
||||||
'thumbnail': 're:http://.*\.jpg',
|
'thumbnail': 're:http://.*\.jpg',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.allocine.fr/video/video-19550147/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
77
youtube_dl/extractor/alphaporno.py
Normal file
77
youtube_dl/extractor/alphaporno.py
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
parse_iso8601,
|
||||||
|
parse_duration,
|
||||||
|
parse_filesize,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AlphaPornoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?alphaporno\.com/videos/(?P<id>[^/]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.alphaporno.com/videos/sensual-striptease-porn-with-samantha-alexandra/',
|
||||||
|
'md5': 'feb6d3bba8848cd54467a87ad34bd38e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '258807',
|
||||||
|
'display_id': 'sensual-striptease-porn-with-samantha-alexandra',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Sensual striptease porn with Samantha Alexandra',
|
||||||
|
'thumbnail': 're:https?://.*\.jpg$',
|
||||||
|
'timestamp': 1418694611,
|
||||||
|
'upload_date': '20141216',
|
||||||
|
'duration': 387,
|
||||||
|
'filesize_approx': 54120000,
|
||||||
|
'tbr': 1145,
|
||||||
|
'categories': list,
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r"video_id\s*:\s*'([^']+)'", webpage, 'video id', default=None)
|
||||||
|
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r"video_url\s*:\s*'([^']+)'", webpage, 'video url')
|
||||||
|
ext = self._html_search_meta(
|
||||||
|
'encodingFormat', webpage, 'ext', default='.mp4')[1:]
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
[r'<meta content="([^"]+)" itemprop="description">',
|
||||||
|
r'class="title" itemprop="name">([^<]+)<'],
|
||||||
|
webpage, 'title')
|
||||||
|
thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail')
|
||||||
|
timestamp = parse_iso8601(self._html_search_meta(
|
||||||
|
'uploadDate', webpage, 'upload date'))
|
||||||
|
duration = parse_duration(self._html_search_meta(
|
||||||
|
'duration', webpage, 'duration'))
|
||||||
|
filesize_approx = parse_filesize(self._html_search_meta(
|
||||||
|
'contentSize', webpage, 'file size'))
|
||||||
|
bitrate = int_or_none(self._html_search_meta(
|
||||||
|
'bitrate', webpage, 'bitrate'))
|
||||||
|
categories = self._html_search_meta(
|
||||||
|
'keywords', webpage, 'categories', default='').split(',')
|
||||||
|
|
||||||
|
age_limit = self._rta_search(webpage)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': ext,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'duration': duration,
|
||||||
|
'filesize_approx': filesize_approx,
|
||||||
|
'tbr': bitrate,
|
||||||
|
'categories': categories,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
}
|
@@ -1,42 +1,48 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import unified_strdate
|
||||||
unified_strdate,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ArchiveOrgIE(InfoExtractor):
|
class ArchiveOrgIE(InfoExtractor):
|
||||||
IE_NAME = 'archive.org'
|
IE_NAME = 'archive.org'
|
||||||
IE_DESC = 'archive.org videos'
|
IE_DESC = 'archive.org videos'
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
|
_VALID_URL = r'https?://(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
"url": "http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
|
'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||||
'file': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
|
|
||||||
'md5': '8af1d4cf447933ed3c7f4871162602db',
|
'md5': '8af1d4cf447933ed3c7f4871162602db',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
"title": "1968 Demo - FJCC Conference Presentation Reel #1",
|
'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||||
"description": "Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also <a href=\"http://dougengelbart.org/firsts/dougs-1968-demo.html\" rel=\"nofollow\">Doug's 1968 Demo page</a> for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | <a href=\"http://www.archive.org/details/XD300-24_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 2</a> | <a href=\"http://www.archive.org/details/XD300-25_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 3</a>",
|
'ext': 'ogv',
|
||||||
"upload_date": "19681210",
|
'title': '1968 Demo - FJCC Conference Presentation Reel #1',
|
||||||
"uploader": "SRI International"
|
'description': 'md5:1780b464abaca9991d8968c877bb53ed',
|
||||||
|
'upload_date': '19681210',
|
||||||
|
'uploader': 'SRI International'
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://archive.org/details/Cops1922',
|
||||||
|
'md5': '18f2a19e6d89af8425671da1cf3d4e04',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Cops1922',
|
||||||
|
'ext': 'ogv',
|
||||||
|
'title': 'Buster Keaton\'s "Cops" (1922)',
|
||||||
|
'description': 'md5:70f72ee70882f713d4578725461ffcc3',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
json_url = url + ('?' if '?' in url else '&') + 'output=json'
|
json_url = url + ('?' if '?' in url else '&') + 'output=json'
|
||||||
json_data = self._download_webpage(json_url, video_id)
|
data = self._download_json(json_url, video_id)
|
||||||
data = json.loads(json_data)
|
|
||||||
|
|
||||||
title = data['metadata']['title'][0]
|
def get_optional(data_dict, field):
|
||||||
description = data['metadata']['description'][0]
|
return data_dict['metadata'].get(field, [None])[0]
|
||||||
uploader = data['metadata']['creator'][0]
|
|
||||||
upload_date = unified_strdate(data['metadata']['date'][0])
|
title = get_optional(data, 'title')
|
||||||
|
description = get_optional(data, 'description')
|
||||||
|
uploader = get_optional(data, 'creator')
|
||||||
|
upload_date = unified_strdate(get_optional(data, 'date'))
|
||||||
|
|
||||||
formats = [
|
formats = [
|
||||||
{
|
{
|
||||||
|
@@ -37,7 +37,7 @@ class ArteTvIE(InfoExtractor):
|
|||||||
config_xml_url, video_id, note='Downloading configuration')
|
config_xml_url, video_id, note='Downloading configuration')
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'forma_id': q.attrib['quality'],
|
'format_id': q.attrib['quality'],
|
||||||
# The playpath starts at 'mp4:', if we don't manually
|
# The playpath starts at 'mp4:', if we don't manually
|
||||||
# split the url, rtmpdump will incorrectly parse them
|
# split the url, rtmpdump will incorrectly parse them
|
||||||
'url': q.text.split('mp4:', 1)[0],
|
'url': q.text.split('mp4:', 1)[0],
|
||||||
@@ -133,7 +133,7 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
'width': int_or_none(f.get('width')),
|
'width': int_or_none(f.get('width')),
|
||||||
'height': int_or_none(f.get('height')),
|
'height': int_or_none(f.get('height')),
|
||||||
'tbr': int_or_none(f.get('bitrate')),
|
'tbr': int_or_none(f.get('bitrate')),
|
||||||
'quality': qfunc(f['quality']),
|
'quality': qfunc(f.get('quality')),
|
||||||
'source_preference': source_pref,
|
'source_preference': source_pref,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
114
youtube_dl/extractor/atresplayer.py
Normal file
114
youtube_dl/extractor/atresplayer.py
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import time
|
||||||
|
import hmac
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_request,
|
||||||
|
int_or_none,
|
||||||
|
float_or_none,
|
||||||
|
xpath_text,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AtresPlayerIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html',
|
||||||
|
'md5': 'efd56753cda1bb64df52a3074f62e38a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'capitulo-10-especial-solidario-nochebuena',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Especial Solidario de Nochebuena',
|
||||||
|
'description': 'md5:e2d52ff12214fa937107d21064075bf1',
|
||||||
|
'duration': 5527.6,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
_USER_AGENT = 'Dalvik/1.6.0 (Linux; U; Android 4.3; GT-I9300 Build/JSS15J'
|
||||||
|
_MAGIC = 'QWtMLXs414Yo+c#_+Q#K@NN)'
|
||||||
|
_TIMESTAMP_SHIFT = 30000
|
||||||
|
|
||||||
|
_TIME_API_URL = 'http://servicios.atresplayer.com/api/admin/time.json'
|
||||||
|
_URL_VIDEO_TEMPLATE = 'https://servicios.atresplayer.com/api/urlVideo/{1}/{0}/{1}|{2}|{3}.json'
|
||||||
|
_PLAYER_URL_TEMPLATE = 'https://servicios.atresplayer.com/episode/getplayer.json?episodePk=%s'
|
||||||
|
_EPISODE_URL_TEMPLATE = 'http://www.atresplayer.com/episodexml/%s'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
episode_id = self._search_regex(
|
||||||
|
r'episode="([^"]+)"', webpage, 'episode id')
|
||||||
|
|
||||||
|
timestamp = int_or_none(self._download_webpage(
|
||||||
|
self._TIME_API_URL,
|
||||||
|
video_id, 'Downloading timestamp', fatal=False), 1000, time.time())
|
||||||
|
timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT)
|
||||||
|
token = hmac.new(
|
||||||
|
self._MAGIC.encode('ascii'),
|
||||||
|
(episode_id + timestamp_shifted).encode('utf-8')
|
||||||
|
).hexdigest()
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for fmt in ['windows', 'android_tablet']:
|
||||||
|
request = compat_urllib_request.Request(
|
||||||
|
self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
|
||||||
|
request.add_header('Youtubedl-user-agent', self._USER_AGENT)
|
||||||
|
|
||||||
|
fmt_json = self._download_json(
|
||||||
|
request, video_id, 'Downloading %s video JSON' % fmt)
|
||||||
|
|
||||||
|
result = fmt_json.get('resultDes')
|
||||||
|
if result.lower() != 'ok':
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s returned error: %s' % (self.IE_NAME, result), expected=True)
|
||||||
|
|
||||||
|
for _, video_url in fmt_json['resultObject'].items():
|
||||||
|
if video_url.endswith('/Manifest'):
|
||||||
|
formats.extend(self._extract_f4m_formats(video_url[:-9] + '/manifest.f4m', video_id))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': 'android',
|
||||||
|
'preference': 1,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
player = self._download_json(
|
||||||
|
self._PLAYER_URL_TEMPLATE % episode_id,
|
||||||
|
episode_id)
|
||||||
|
|
||||||
|
path_data = player.get('pathData')
|
||||||
|
|
||||||
|
episode = self._download_xml(
|
||||||
|
self._EPISODE_URL_TEMPLATE % path_data,
|
||||||
|
video_id, 'Downloading episode XML')
|
||||||
|
|
||||||
|
duration = float_or_none(xpath_text(
|
||||||
|
episode, './media/asset/info/technical/contentDuration', 'duration'))
|
||||||
|
|
||||||
|
art = episode.find('./media/asset/info/art')
|
||||||
|
title = xpath_text(art, './name', 'title')
|
||||||
|
description = xpath_text(art, './description', 'description')
|
||||||
|
thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -10,7 +10,7 @@ from ..compat import compat_HTTPError
|
|||||||
class BBCCoUkIE(SubtitlesInfoExtractor):
|
class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||||
IE_NAME = 'bbc.co.uk'
|
IE_NAME = 'bbc.co.uk'
|
||||||
IE_DESC = 'BBC iPlayer'
|
IE_DESC = 'BBC iPlayer'
|
||||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z]{8})'
|
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@@ -18,8 +18,8 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'b039d07m',
|
'id': 'b039d07m',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Kaleidoscope: Leonard Cohen',
|
'title': 'Kaleidoscope, Leonard Cohen',
|
||||||
'description': 'md5:db4755d7a665ae72343779f7dacb402c',
|
'description': 'The Canadian poet and songwriter reflects on his musical career.',
|
||||||
'duration': 1740,
|
'duration': 1740,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@@ -71,7 +71,54 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||||
},
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b04v209v',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Pete Tong, The Essential New Tune Special',
|
||||||
|
'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
|
||||||
|
'duration': 10800,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/music/clips/p02frcc3',
|
||||||
|
'note': 'Audio',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p02frcch',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix',
|
||||||
|
'description': 'French house superstar Madeon takes us out of the club and onto the after party.',
|
||||||
|
'duration': 3507,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
|
||||||
|
'note': 'Video',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p025c103',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
|
||||||
|
'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
|
||||||
|
'duration': 226,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _extract_asx_playlist(self, connection, programme_id):
|
def _extract_asx_playlist(self, connection, programme_id):
|
||||||
@@ -203,6 +250,59 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
return formats, subtitles
|
return formats, subtitles
|
||||||
|
|
||||||
|
def _download_playlist(self, playlist_id):
|
||||||
|
try:
|
||||||
|
playlist = self._download_json(
|
||||||
|
'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
|
||||||
|
playlist_id, 'Downloading playlist JSON')
|
||||||
|
|
||||||
|
version = playlist.get('defaultAvailableVersion')
|
||||||
|
if version:
|
||||||
|
smp_config = version['smpConfig']
|
||||||
|
title = smp_config['title']
|
||||||
|
description = smp_config['summary']
|
||||||
|
for item in smp_config['items']:
|
||||||
|
kind = item['kind']
|
||||||
|
if kind != 'programme' and kind != 'radioProgramme':
|
||||||
|
continue
|
||||||
|
programme_id = item.get('vpid')
|
||||||
|
duration = int(item.get('duration'))
|
||||||
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
|
return programme_id, title, description, duration, formats, subtitles
|
||||||
|
except ExtractorError as ee:
|
||||||
|
if not isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
|
||||||
|
raise
|
||||||
|
|
||||||
|
# fallback to legacy playlist
|
||||||
|
playlist = self._download_xml(
|
||||||
|
'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id,
|
||||||
|
playlist_id, 'Downloading legacy playlist XML')
|
||||||
|
|
||||||
|
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||||
|
if no_items is not None:
|
||||||
|
reason = no_items.get('reason')
|
||||||
|
if reason == 'preAvailability':
|
||||||
|
msg = 'Episode %s is not yet available' % playlist_id
|
||||||
|
elif reason == 'postAvailability':
|
||||||
|
msg = 'Episode %s is no longer available' % playlist_id
|
||||||
|
elif reason == 'noMedia':
|
||||||
|
msg = 'Episode %s is not currently available' % playlist_id
|
||||||
|
else:
|
||||||
|
msg = 'Episode %s is not available: %s' % (playlist_id, reason)
|
||||||
|
raise ExtractorError(msg, expected=True)
|
||||||
|
|
||||||
|
for item in self._extract_items(playlist):
|
||||||
|
kind = item.get('kind')
|
||||||
|
if kind != 'programme' and kind != 'radioProgramme':
|
||||||
|
continue
|
||||||
|
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
||||||
|
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
||||||
|
programme_id = item.get('identifier')
|
||||||
|
duration = int(item.get('duration'))
|
||||||
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
|
|
||||||
|
return programme_id, title, description, duration, formats, subtitles
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
group_id = self._match_id(url)
|
group_id = self._match_id(url)
|
||||||
|
|
||||||
@@ -219,32 +319,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
|||||||
duration = player['duration']
|
duration = player['duration']
|
||||||
formats, subtitles = self._download_media_selector(programme_id)
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
else:
|
else:
|
||||||
playlist = self._download_xml(
|
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
|
||||||
'http://www.bbc.co.uk/iplayer/playlist/%s' % group_id,
|
|
||||||
group_id, 'Downloading playlist XML')
|
|
||||||
|
|
||||||
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
|
||||||
if no_items is not None:
|
|
||||||
reason = no_items.get('reason')
|
|
||||||
if reason == 'preAvailability':
|
|
||||||
msg = 'Episode %s is not yet available' % group_id
|
|
||||||
elif reason == 'postAvailability':
|
|
||||||
msg = 'Episode %s is no longer available' % group_id
|
|
||||||
elif reason == 'noMedia':
|
|
||||||
msg = 'Episode %s is not currently available' % group_id
|
|
||||||
else:
|
|
||||||
msg = 'Episode %s is not available: %s' % (group_id, reason)
|
|
||||||
raise ExtractorError(msg, expected=True)
|
|
||||||
|
|
||||||
for item in self._extract_items(playlist):
|
|
||||||
kind = item.get('kind')
|
|
||||||
if kind != 'programme' and kind != 'radioProgramme':
|
|
||||||
continue
|
|
||||||
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
|
||||||
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
|
||||||
programme_id = item.get('identifier')
|
|
||||||
duration = int(item.get('duration'))
|
|
||||||
formats, subtitles = self._download_media_selector(programme_id)
|
|
||||||
|
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
self._list_available_subtitles(programme_id, subtitles)
|
self._list_available_subtitles(programme_id, subtitles)
|
||||||
|
@@ -16,7 +16,7 @@ class BetIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
|
'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '417cd61c-c793-4e8e-b006-e445ecc45add',
|
'id': '406429c6-1b8a-463e-83fc-814adb81a9db',
|
||||||
'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
|
'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'BET News Presents: A Conversation With President Obama',
|
'title': 'BET News Presents: A Conversation With President Obama',
|
||||||
|
@@ -25,7 +25,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class BrightcoveIE(InfoExtractor):
|
class BrightcoveIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*?\?(?P<query>.*)'
|
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
|
||||||
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
|
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
|
@@ -5,6 +5,8 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
HEADRequest,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
url_basename,
|
url_basename,
|
||||||
qualities,
|
qualities,
|
||||||
@@ -76,6 +78,16 @@ class CanalplusIE(InfoExtractor):
|
|||||||
|
|
||||||
preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD', 'HLS', 'HDS'])
|
preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD', 'HLS', 'HDS'])
|
||||||
|
|
||||||
|
fmt_url = next(iter(media.find('VIDEOS'))).text
|
||||||
|
if '/geo' in fmt_url.lower():
|
||||||
|
response = self._request_webpage(
|
||||||
|
HEADRequest(fmt_url), video_id,
|
||||||
|
'Checking if the video is georestricted')
|
||||||
|
if '/blocage' in response.geturl():
|
||||||
|
raise ExtractorError(
|
||||||
|
'The video is not available in your country',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for fmt in media.find('VIDEOS'):
|
for fmt in media.find('VIDEOS'):
|
||||||
format_url = fmt.text
|
format_url = fmt.text
|
||||||
|
@@ -11,6 +11,7 @@ from ..compat import (
|
|||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -19,41 +20,33 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/213512120230004-spanelska-chripka',
|
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '213512120230004',
|
'id': '214411058091220',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'První republika: Španělská chřipka',
|
'title': 'Hyde Park Civilizace',
|
||||||
'duration': 3107.4,
|
'description': 'Věda a současná civilizace. Interaktivní pořad - prostor pro vaše otázky a komentáře',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'duration': 3350,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # requires rtmpdump
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'Works only from Czech Republic.',
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://www.ceskatelevize.cz/ivysilani/1030584952-tsatsiki-maminka-a-policajt',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '20138143440',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Tsatsiki, maminka a policajt',
|
|
||||||
'duration': 6754.1,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True, # requires rtmpdump
|
|
||||||
},
|
|
||||||
'skip': 'Works only from Czech Republic.',
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
|
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '14716',
|
'id': '14716',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'První republika: Zpěvačka z Dupárny Bobina',
|
'title': 'První republika: Zpěvačka z Dupárny Bobina',
|
||||||
'duration': 90,
|
'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'duration': 88.4,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # requires rtmpdump
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
@@ -80,8 +73,9 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
'requestSource': 'iVysilani',
|
'requestSource': 'iVysilani',
|
||||||
}
|
}
|
||||||
|
|
||||||
req = compat_urllib_request.Request('http://www.ceskatelevize.cz/ivysilani/ajax/get-playlist-url',
|
req = compat_urllib_request.Request(
|
||||||
data=compat_urllib_parse.urlencode(data))
|
'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
||||||
|
data=compat_urllib_parse.urlencode(data))
|
||||||
|
|
||||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||||
req.add_header('x-addr', '127.0.0.1')
|
req.add_header('x-addr', '127.0.0.1')
|
||||||
@@ -90,39 +84,31 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
|
|
||||||
playlistpage = self._download_json(req, video_id)
|
playlistpage = self._download_json(req, video_id)
|
||||||
|
|
||||||
req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlistpage['url']))
|
playlist_url = playlistpage['url']
|
||||||
|
if playlist_url == 'error_region':
|
||||||
|
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||||
|
|
||||||
|
req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlist_url))
|
||||||
req.add_header('Referer', url)
|
req.add_header('Referer', url)
|
||||||
|
|
||||||
playlist = self._download_xml(req, video_id)
|
playlist = self._download_json(req, video_id)
|
||||||
|
|
||||||
|
item = playlist['playlist'][0]
|
||||||
formats = []
|
formats = []
|
||||||
for i in playlist.find('smilRoot/body'):
|
for format_id, stream_url in item['streamUrls'].items():
|
||||||
if 'AD' not in i.attrib['id']:
|
formats.extend(self._extract_m3u8_formats(stream_url, video_id, 'mp4'))
|
||||||
base_url = i.attrib['base']
|
|
||||||
parsedurl = compat_urllib_parse_urlparse(base_url)
|
|
||||||
duration = i.attrib['duration']
|
|
||||||
|
|
||||||
for video in i.findall('video'):
|
|
||||||
if video.attrib['label'] != 'AD':
|
|
||||||
format_id = video.attrib['label']
|
|
||||||
play_path = video.attrib['src']
|
|
||||||
vbr = int(video.attrib['system-bitrate'])
|
|
||||||
|
|
||||||
formats.append({
|
|
||||||
'format_id': format_id,
|
|
||||||
'url': base_url,
|
|
||||||
'vbr': vbr,
|
|
||||||
'play_path': play_path,
|
|
||||||
'app': parsedurl.path[1:] + '?' + parsedurl.query,
|
|
||||||
'rtmp_live': True,
|
|
||||||
'ext': 'flv',
|
|
||||||
})
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
duration = float_or_none(item.get('duration'))
|
||||||
|
thumbnail = item.get('previewImageUrl')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': episode_id,
|
'id': episode_id,
|
||||||
'title': self._html_search_regex(r'<title>(.+?) — iVysílání — Česká televize</title>', webpage, 'title'),
|
'title': title,
|
||||||
'duration': float(duration),
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@@ -11,14 +11,14 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class CNNIE(InfoExtractor):
|
class CNNIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/
|
_VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
|
||||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn(-ap)?|(?=&)))'''
|
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln)(?:-ap)?|(?=&)))'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
||||||
'md5': '3e6121ea48df7e2259fe73a0628605c4',
|
'md5': '3e6121ea48df7e2259fe73a0628605c4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'sports_2013_06_09_nadal-1-on-1.cnn',
|
'id': 'sports/2013/06/09/nadal-1-on-1.cnn',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Nadal wins 8th French Open title',
|
'title': 'Nadal wins 8th French Open title',
|
||||||
'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
|
'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
|
||||||
@@ -35,6 +35,16 @@ class CNNIE(InfoExtractor):
|
|||||||
"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
|
"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
|
||||||
"upload_date": "20130821",
|
"upload_date": "20130821",
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
|
||||||
|
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Nashville Ep. 1: Hand crafted skateboards',
|
||||||
|
'description': 'md5:e7223a503315c9f150acac52e76de086',
|
||||||
|
'upload_date': '20141222',
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -127,3 +137,28 @@ class CNNBlogsIE(InfoExtractor):
|
|||||||
'url': cnn_url,
|
'url': cnn_url,
|
||||||
'ie_key': CNNIE.ie_key(),
|
'ie_key': CNNIE.ie_key(),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CNNArticleIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!video/)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
|
||||||
|
'md5': '275b326f85d80dff7592a9820f5dc887',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'bestoftv/2014/12/21/sotu-crowley-president-obama-north-korea-not-going-to-be-intimidated.cnn',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Obama: We\'re not going to be intimidated',
|
||||||
|
'description': 'md5:e735586f3dc936075fa654a4d91b21f9',
|
||||||
|
'upload_date': '20141220',
|
||||||
|
},
|
||||||
|
'add_ie': ['CNN'],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
webpage = self._download_webpage(url, url_basename(url))
|
||||||
|
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
|
||||||
|
return {
|
||||||
|
'_type': 'url',
|
||||||
|
'url': 'http://cnn.com/video/?/video/' + cnn_url,
|
||||||
|
'ie_key': CNNIE.ie_key(),
|
||||||
|
}
|
||||||
|
@@ -50,7 +50,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
|||||||
)|
|
)|
|
||||||
(?P<interview>
|
(?P<interview>
|
||||||
extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
|
extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
|
||||||
(?:[?#].*|$)'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
|
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
|
||||||
'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
|
'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
|
||||||
@@ -83,6 +83,9 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
|
'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://thedailyshow.cc.com/video-playlists/t6d9sg/the-daily-show-20038-highlights/be3cwo',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
|
'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@@ -40,7 +40,7 @@ class InfoExtractor(object):
|
|||||||
information about the video (or videos) the URL refers to. This
|
information about the video (or videos) the URL refers to. This
|
||||||
information includes the real video URL, the video title, author and
|
information includes the real video URL, the video title, author and
|
||||||
others. The information is stored in a dictionary which is then
|
others. The information is stored in a dictionary which is then
|
||||||
passed to the FileDownloader. The FileDownloader processes this
|
passed to the YoutubeDL. The YoutubeDL processes this
|
||||||
information possibly downloading the video to the file system, among
|
information possibly downloading the video to the file system, among
|
||||||
other possible outcomes.
|
other possible outcomes.
|
||||||
|
|
||||||
@@ -92,6 +92,8 @@ class InfoExtractor(object):
|
|||||||
by this field, regardless of all other values.
|
by this field, regardless of all other values.
|
||||||
-1 for default (order by other properties),
|
-1 for default (order by other properties),
|
||||||
-2 or smaller for less than default.
|
-2 or smaller for less than default.
|
||||||
|
< -1000 to hide the format (if there is
|
||||||
|
another one which is strictly better)
|
||||||
* language_preference Is this in the correct requested
|
* language_preference Is this in the correct requested
|
||||||
language?
|
language?
|
||||||
10 if it's what the URL is about,
|
10 if it's what the URL is about,
|
||||||
@@ -589,7 +591,7 @@ class InfoExtractor(object):
|
|||||||
if display_name is None:
|
if display_name is None:
|
||||||
display_name = name
|
display_name = name
|
||||||
return self._html_search_regex(
|
return self._html_search_regex(
|
||||||
r'''(?ix)<meta
|
r'''(?isx)<meta
|
||||||
(?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
|
(?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
|
||||||
[^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name),
|
[^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name),
|
||||||
html, display_name, fatal=fatal, group='content', **kwargs)
|
html, display_name, fatal=fatal, group='content', **kwargs)
|
||||||
|
29
youtube_dl/extractor/commonmistakes.py
Normal file
29
youtube_dl/extractor/commonmistakes.py
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class CommonMistakesIE(InfoExtractor):
|
||||||
|
IE_DESC = False # Do not list
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
(?:url|URL)
|
||||||
|
'''
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'url',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'URL',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
msg = (
|
||||||
|
'You\'ve asked youtube-dl to download the URL "%s". '
|
||||||
|
'That doesn\'t make any sense. '
|
||||||
|
'Simply remove the parameter in your command or configuration.'
|
||||||
|
) % url
|
||||||
|
if self._downloader.params.get('verbose'):
|
||||||
|
msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.'
|
||||||
|
raise ExtractorError(msg, expected=True)
|
@@ -29,10 +29,9 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class CrunchyrollIE(SubtitlesInfoExtractor):
|
class CrunchyrollIE(SubtitlesInfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||||
#'md5': 'b1639fd6ddfaa43788c85f6d1dddd412',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '645513',
|
'id': '645513',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
@@ -47,7 +46,10 @@ class CrunchyrollIE(SubtitlesInfoExtractor):
|
|||||||
# rtmp
|
# rtmp
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
_FORMAT_IDS = {
|
_FORMAT_IDS = {
|
||||||
'360': ('60', '106'),
|
'360': ('60', '106'),
|
||||||
@@ -226,7 +228,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False)
|
video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage):
|
for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
|
||||||
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
||||||
video_format = fmt + 'p'
|
video_format = fmt + 'p'
|
||||||
streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
|
streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
|
||||||
|
@@ -27,7 +27,6 @@ class CSpanIE(InfoExtractor):
|
|||||||
'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
|
'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
|
||||||
# For whatever reason, the served video alternates between
|
# For whatever reason, the served video alternates between
|
||||||
# two different ones
|
# two different ones
|
||||||
#'md5': 'dbb0f047376d457f2ab8b3929cbb2d0c',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '340723',
|
'id': '340723',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@@ -38,7 +38,7 @@ class DaumIE(InfoExtractor):
|
|||||||
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
|
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
|
||||||
webpage = self._download_webpage(canonical_url, video_id)
|
webpage = self._download_webpage(canonical_url, video_id)
|
||||||
full_id = self._search_regex(
|
full_id = self._search_regex(
|
||||||
r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]',
|
r'src=["\']http://videofarm\.daum\.net/controller/video/viewer/Video\.html\?.*?vid=(.+?)[&"\']',
|
||||||
webpage, 'full id')
|
webpage, 'full id')
|
||||||
query = compat_urllib_parse.urlencode({'vid': full_id})
|
query = compat_urllib_parse.urlencode({'vid': full_id})
|
||||||
info = self._download_xml(
|
info = self._download_xml(
|
||||||
|
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@@ -61,7 +62,7 @@ class DBTVIE(InfoExtractor):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video['id'],
|
'id': compat_str(video['id']),
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': video['title'],
|
'title': video['title'],
|
||||||
'description': clean_html(video['desc']),
|
'description': clean_html(video['desc']),
|
||||||
|
125
youtube_dl/extractor/dvtv.py
Normal file
125
youtube_dl/extractor/dvtv.py
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
js_to_json,
|
||||||
|
unescapeHTML,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DVTVIE(InfoExtractor):
|
||||||
|
IE_NAME = 'dvtv'
|
||||||
|
IE_DESC = 'http://video.aktualne.cz/'
|
||||||
|
|
||||||
|
_VALID_URL = r'http://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/',
|
||||||
|
'md5': '67cb83e4a955d36e1b5d31993134a0c2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'dc0768de855511e49e4b0025900fea04',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://video.aktualne.cz/dvtv/stropnicky-policie-vrbetice-preventivne-nekontrolovala/r~82ed4322849211e4a10c0025900fea04/',
|
||||||
|
'md5': '6388f1941b48537dbd28791f712af8bf',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '72c02230849211e49f60002590604f2e',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Stropnický: Policie Vrbětice preventivně nekontrolovala',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci',
|
||||||
|
'id': '973eb3bc854e11e498be002590604f2e',
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'md5': 'da7ca6be4935532241fa9520b3ad91e4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b0b40906854d11e4bdad0025900fea04',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Drtinová Veselovský TV 16. 12. 2014: Témata dne'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'md5': '5f7652a08b05009c1292317b449ffea2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '420ad9ec854a11e4bdad0025900fea04',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Školní masakr možná změní boj s Talibanem, říká novinářka'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'md5': '498eb9dfa97169f409126c617e2a3d64',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '95d35580846a11e4b6d20025900fea04',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Boj o kliniku: Veřejný zájem, nebo právo na majetek?'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'md5': 'b8dc6b744844032dab6ba3781a7274b9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6fe14d66853511e4833a0025900fea04',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Pánek: Odmítání syrských uprchlíků je ostudou české vlády'
|
||||||
|
}
|
||||||
|
}],
|
||||||
|
}, {
|
||||||
|
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _parse_video_metadata(self, js, video_id):
|
||||||
|
metadata = self._parse_json(js, video_id, transform_source=js_to_json)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for video in metadata['sources']:
|
||||||
|
ext = video['type'][6:]
|
||||||
|
formats.append({
|
||||||
|
'url': video['file'],
|
||||||
|
'ext': ext,
|
||||||
|
'format_id': '%s-%s' % (ext, video['label']),
|
||||||
|
'height': int(video['label'].rstrip('p')),
|
||||||
|
'fps': 25,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': metadata['mediaid'],
|
||||||
|
'title': unescapeHTML(metadata['title']),
|
||||||
|
'thumbnail': self._proto_relative_url(metadata['image'], 'http:'),
|
||||||
|
'formats': formats
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
# single video
|
||||||
|
item = self._search_regex(
|
||||||
|
r"(?s)embedData[0-9a-f]{32}\['asset'\]\s*=\s*(\{.+?\});",
|
||||||
|
webpage, 'video', default=None, fatal=False)
|
||||||
|
|
||||||
|
if item:
|
||||||
|
return self._parse_video_metadata(item, video_id)
|
||||||
|
|
||||||
|
# playlist
|
||||||
|
items = re.findall(
|
||||||
|
r"(?s)BBX\.context\.assets\['[0-9a-f]{32}'\]\.push\(({.+?})\);",
|
||||||
|
webpage)
|
||||||
|
|
||||||
|
if items:
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'entries': [self._parse_video_metadata(i, video_id) for i in items]
|
||||||
|
}
|
||||||
|
|
||||||
|
raise ExtractorError('Could not find neither video nor playlist')
|
46
youtube_dl/extractor/echomsk.py
Normal file
46
youtube_dl/extractor/echomsk.py
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class EchoMskIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.echo.msk.ru/sounds/1464134.html',
|
||||||
|
'md5': '2e44b3b78daff5b458e4dbc37f191f7c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1464134',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Особое мнение - 29 декабря 2014, 19:08',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
audio_url = self._search_regex(
|
||||||
|
r'<a rel="mp3" href="([^"]+)">', webpage, 'audio URL')
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<a href="/programs/[^"]+" target="_blank">([^<]+)</a>',
|
||||||
|
webpage, 'title')
|
||||||
|
|
||||||
|
air_date = self._html_search_regex(
|
||||||
|
r'(?s)<div class="date">(.+?)</div>',
|
||||||
|
webpage, 'date', fatal=False, default=None)
|
||||||
|
|
||||||
|
if air_date:
|
||||||
|
air_date = re.sub(r'(\s)\1+', r'\1', air_date)
|
||||||
|
if air_date:
|
||||||
|
title = '%s - %s' % (title, air_date)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': audio_url,
|
||||||
|
'title': title,
|
||||||
|
}
|
@@ -1,7 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -12,32 +11,49 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class EllenTVIE(InfoExtractor):
|
class EllenTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?ellentv\.com/videos/(?P<id>[a-z0-9_-]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.ellentv.com/videos/0-7jqrsr18/',
|
'url': 'http://www.ellentv.com/videos/0-7jqrsr18/',
|
||||||
'md5': 'e4af06f3bf0d5f471921a18db5764642',
|
'md5': 'e4af06f3bf0d5f471921a18db5764642',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0-7jqrsr18',
|
'id': '0-7jqrsr18',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'What\'s Wrong with These Photos? A Whole Lot',
|
'title': 'What\'s Wrong with These Photos? A Whole Lot',
|
||||||
|
'description': 'md5:35f152dc66b587cf13e6d2cf4fa467f6',
|
||||||
'timestamp': 1406876400,
|
'timestamp': 1406876400,
|
||||||
'upload_date': '20140801',
|
'upload_date': '20140801',
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://ellentube.com/videos/0-dvzmabd5/',
|
||||||
|
'md5': '98238118eaa2bbdf6ad7f708e3e4f4eb',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0-dvzmabd5',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '1 year old twin sister makes her brother laugh',
|
||||||
|
'description': '1 year old twin sister makes her brother laugh',
|
||||||
|
'timestamp': 1419542075,
|
||||||
|
'upload_date': '20141225',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_url = self._html_search_meta('VideoURL', webpage, 'url')
|
||||||
|
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||||
|
r'pageName\s*=\s*"([^"]+)"', webpage, 'title')
|
||||||
|
description = self._html_search_meta(
|
||||||
|
'description', webpage, 'description') or self._og_search_description(webpage)
|
||||||
timestamp = parse_iso8601(self._search_regex(
|
timestamp = parse_iso8601(self._search_regex(
|
||||||
r'<span class="publish-date"><time datetime="([^"]+)">',
|
r'<span class="publish-date"><time datetime="([^"]+)">',
|
||||||
webpage, 'timestamp'))
|
webpage, 'timestamp'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._og_search_title(webpage),
|
'url': video_url,
|
||||||
'url': self._html_search_meta('VideoURL', webpage, 'url'),
|
'title': title,
|
||||||
|
'description': description,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -55,8 +71,7 @@ class EllenTVClipsIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
playlist_id = self._match_id(url)
|
||||||
playlist_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
playlist = self._extract_playlist(webpage)
|
playlist = self._extract_playlist(webpage)
|
||||||
|
45
youtube_dl/extractor/eroprofile.py
Normal file
45
youtube_dl/extractor/eroprofile.py
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class EroProfileIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
|
||||||
|
'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3733775',
|
||||||
|
'display_id': 'sexy-babe-softcore',
|
||||||
|
'ext': 'm4v',
|
||||||
|
'title': 'sexy babe softcore',
|
||||||
|
'thumbnail': 're:https?://.*\.jpg',
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
|
||||||
|
webpage, 'video id', default=None)
|
||||||
|
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r'<source src="([^"]+)', webpage, 'video url')
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'Title:</th><td>([^<]+)</td>', webpage, 'title')
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r'onclick="showVideoPlayer\(\)"><img src="([^"]+)',
|
||||||
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
@@ -6,7 +6,9 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
parse_iso8601
|
parse_iso8601,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
NAMESPACE_MAP = {
|
NAMESPACE_MAP = {
|
||||||
@@ -21,21 +23,38 @@ RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/'
|
|||||||
|
|
||||||
class GameOneIE(InfoExtractor):
|
class GameOneIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P<id>\d+)'
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
'url': 'http://www.gameone.de/tv/288',
|
{
|
||||||
'md5': '136656b7fb4c9cb4a8e2d500651c499b',
|
'url': 'http://www.gameone.de/tv/288',
|
||||||
'info_dict': {
|
'md5': '136656b7fb4c9cb4a8e2d500651c499b',
|
||||||
'id': '288',
|
'info_dict': {
|
||||||
'ext': 'mp4',
|
'id': '288',
|
||||||
'title': 'Game One - Folge 288',
|
'ext': 'mp4',
|
||||||
'duration': 1238,
|
'title': 'Game One - Folge 288',
|
||||||
'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg',
|
'duration': 1238,
|
||||||
'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1',
|
'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg',
|
||||||
'age_limit': 16,
|
'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1',
|
||||||
'upload_date': '20140513',
|
'age_limit': 16,
|
||||||
'timestamp': 1399980122,
|
'upload_date': '20140513',
|
||||||
|
'timestamp': 1399980122,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://gameone.de/tv/220',
|
||||||
|
'md5': '5227ca74c4ae6b5f74c0510a7c48839e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '220',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20120918',
|
||||||
|
'description': 'Jet Set Radio HD, Tekken Tag Tournament 2, Source Filmmaker',
|
||||||
|
'timestamp': 1347971451,
|
||||||
|
'title': 'Game One - Folge 220',
|
||||||
|
'duration': 896.62,
|
||||||
|
'age_limit': 16,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@@ -66,13 +85,13 @@ class GameOneIE(InfoExtractor):
|
|||||||
video_id,
|
video_id,
|
||||||
'Downloading media:content')
|
'Downloading media:content')
|
||||||
rendition_items = content.findall('.//rendition')
|
rendition_items = content.findall('.//rendition')
|
||||||
duration = int(rendition_items[0].get('duration'))
|
duration = float_or_none(rendition_items[0].get('duration'))
|
||||||
formats = [
|
formats = [
|
||||||
{
|
{
|
||||||
'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text),
|
'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text),
|
||||||
'width': int(r.get('width')),
|
'width': int_or_none(r.get('width')),
|
||||||
'height': int(r.get('height')),
|
'height': int_or_none(r.get('height')),
|
||||||
'tbr': int(r.get('bitrate')),
|
'tbr': int_or_none(r.get('bitrate')),
|
||||||
}
|
}
|
||||||
for r in rendition_items
|
for r in rendition_items
|
||||||
]
|
]
|
||||||
@@ -105,7 +124,8 @@ class GameOnePlaylistIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage('http://www.gameone.de/tv', 'TV')
|
webpage = self._download_webpage('http://www.gameone.de/tv', 'TV')
|
||||||
max_id = max(map(int, re.findall(r'<a href="/tv/(\d+)"', webpage)))
|
max_id = max(map(int, re.findall(r'<a href="/tv/(\d+)"', webpage)))
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result('http://www.gameone.de/tv/%d' % video_id, 'GameOne')
|
self.url_result('http://www.gameone.de/tv/%d' %
|
||||||
|
video_id, 'GameOne')
|
||||||
for video_id in range(max_id, 0, -1)]
|
for video_id in range(max_id, 0, -1)]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@@ -23,6 +23,7 @@ from ..utils import (
|
|||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
|
UnsupportedError,
|
||||||
url_basename,
|
url_basename,
|
||||||
)
|
)
|
||||||
from .brightcove import BrightcoveIE
|
from .brightcove import BrightcoveIE
|
||||||
@@ -180,6 +181,14 @@ class GenericIE(InfoExtractor):
|
|||||||
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
|
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# BBC iPlayer embeds
|
||||||
|
{
|
||||||
|
'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'BBC - Blogs - Adam Curtis - BUGGER',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 18,
|
||||||
|
},
|
||||||
# RUTV embed
|
# RUTV embed
|
||||||
{
|
{
|
||||||
'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
|
'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
|
||||||
@@ -698,9 +707,9 @@ class GenericIE(InfoExtractor):
|
|||||||
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
|
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
|
||||||
|
|
||||||
# Helper method
|
# Helper method
|
||||||
def _playlist_from_matches(matches, getter, ie=None):
|
def _playlist_from_matches(matches, getter=None, ie=None):
|
||||||
urlrs = orderedSet(
|
urlrs = orderedSet(
|
||||||
self.url_result(self._proto_relative_url(getter(m)), ie)
|
self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
|
||||||
for m in matches)
|
for m in matches)
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||||
@@ -904,6 +913,11 @@ class GenericIE(InfoExtractor):
|
|||||||
return _playlist_from_matches(
|
return _playlist_from_matches(
|
||||||
matches, getter=unescapeHTML, ie='FunnyOrDie')
|
matches, getter=unescapeHTML, ie='FunnyOrDie')
|
||||||
|
|
||||||
|
# Look for BBC iPlayer embed
|
||||||
|
matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
|
||||||
|
if matches:
|
||||||
|
return _playlist_from_matches(matches, ie='BBCCoUk')
|
||||||
|
|
||||||
# Look for embedded RUTV player
|
# Look for embedded RUTV player
|
||||||
rutv_url = RUTVIE._extract_url(webpage)
|
rutv_url = RUTVIE._extract_url(webpage)
|
||||||
if rutv_url:
|
if rutv_url:
|
||||||
@@ -1057,7 +1071,7 @@ class GenericIE(InfoExtractor):
|
|||||||
'url': new_url,
|
'url': new_url,
|
||||||
}
|
}
|
||||||
if not found:
|
if not found:
|
||||||
raise ExtractorError('Unsupported URL: %s' % url)
|
raise UnsupportedError(url)
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for video_url in found:
|
for video_url in found:
|
||||||
|
71
youtube_dl/extractor/hellporno.py
Normal file
71
youtube_dl/extractor/hellporno.py
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
js_to_json,
|
||||||
|
remove_end,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class HellPornoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?hellporno\.com/videos/(?P<id>[^/]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/',
|
||||||
|
'md5': '1fee339c610d2049699ef2aa699439f1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '149116',
|
||||||
|
'display_id': 'dixie-is-posing-with-naked-ass-very-erotic',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Dixie is posing with naked ass very erotic',
|
||||||
|
'thumbnail': 're:https?://.*\.jpg$',
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
title = remove_end(self._html_search_regex(
|
||||||
|
r'<title>([^<]+)</title>', webpage, 'title'), ' - Hell Porno')
|
||||||
|
|
||||||
|
flashvars = self._parse_json(self._search_regex(
|
||||||
|
r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'),
|
||||||
|
display_id, transform_source=js_to_json)
|
||||||
|
|
||||||
|
video_id = flashvars.get('video_id')
|
||||||
|
thumbnail = flashvars.get('preview_url')
|
||||||
|
ext = flashvars.get('postfix', '.mp4')[1:]
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for video_url_key in ['video_url', 'video_alt_url']:
|
||||||
|
video_url = flashvars.get(video_url_key)
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
video_text = flashvars.get('%s_text' % video_url_key)
|
||||||
|
fmt = {
|
||||||
|
'url': video_url,
|
||||||
|
'ext': ext,
|
||||||
|
'format_id': video_text,
|
||||||
|
}
|
||||||
|
m = re.search(r'^(?P<height>\d+)[pP]', video_text)
|
||||||
|
if m:
|
||||||
|
fmt['height'] = int(m.group('height'))
|
||||||
|
formats.append(fmt)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
categories = self._html_search_meta(
|
||||||
|
'keywords', webpage, 'categories', default='').split(',')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'categories': categories,
|
||||||
|
'age_limit': 18,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
166
youtube_dl/extractor/hitbox.py
Normal file
166
youtube_dl/extractor/hitbox.py
Normal file
@@ -0,0 +1,166 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
parse_iso8601,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class HitboxIE(InfoExtractor):
|
||||||
|
IE_NAME = 'hitbox'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.hitbox.tv/video/203213',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '203213',
|
||||||
|
'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy',
|
||||||
|
'alt_title': 'hitboxlive - Aug 9th #6',
|
||||||
|
'description': '',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 215.1666,
|
||||||
|
'resolution': 'HD 720p',
|
||||||
|
'uploader': 'hitboxlive',
|
||||||
|
'view_count': int,
|
||||||
|
'timestamp': 1407576133,
|
||||||
|
'upload_date': '20140809',
|
||||||
|
'categories': ['Live Show'],
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_metadata(self, url, video_id):
|
||||||
|
thumb_base = 'https://edge.sf.hitbox.tv'
|
||||||
|
metadata = self._download_json(
|
||||||
|
'%s/%s' % (url, video_id), video_id)
|
||||||
|
|
||||||
|
date = 'media_live_since'
|
||||||
|
media_type = 'livestream'
|
||||||
|
if metadata.get('media_type') == 'video':
|
||||||
|
media_type = 'video'
|
||||||
|
date = 'media_date_added'
|
||||||
|
|
||||||
|
video_meta = metadata.get(media_type, [])[0]
|
||||||
|
title = video_meta.get('media_status')
|
||||||
|
alt_title = video_meta.get('media_title')
|
||||||
|
description = clean_html(
|
||||||
|
video_meta.get('media_description') or
|
||||||
|
video_meta.get('media_description_md'))
|
||||||
|
duration = float_or_none(video_meta.get('media_duration'))
|
||||||
|
uploader = video_meta.get('media_user_name')
|
||||||
|
views = int_or_none(video_meta.get('media_views'))
|
||||||
|
timestamp = parse_iso8601(video_meta.get(date), ' ')
|
||||||
|
categories = [video_meta.get('category_name')]
|
||||||
|
thumbs = [
|
||||||
|
{'url': thumb_base + video_meta.get('media_thumbnail'),
|
||||||
|
'width': 320,
|
||||||
|
'height': 180},
|
||||||
|
{'url': thumb_base + video_meta.get('media_thumbnail_large'),
|
||||||
|
'width': 768,
|
||||||
|
'height': 432},
|
||||||
|
]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'alt_title': alt_title,
|
||||||
|
'description': description,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'thumbnails': thumbs,
|
||||||
|
'duration': duration,
|
||||||
|
'uploader': uploader,
|
||||||
|
'view_count': views,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'categories': categories,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
metadata = self._extract_metadata(
|
||||||
|
'https://www.hitbox.tv/api/media/video',
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
player_config = self._download_json(
|
||||||
|
'https://www.hitbox.tv/api/player/config/video/%s' % video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
clip = player_config.get('clip')
|
||||||
|
video_url = clip.get('url')
|
||||||
|
res = clip.get('bitrates', [])[0].get('label')
|
||||||
|
|
||||||
|
metadata['resolution'] = res
|
||||||
|
metadata['url'] = video_url
|
||||||
|
metadata['protocol'] = 'm3u8'
|
||||||
|
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
|
||||||
|
class HitboxLiveIE(HitboxIE):
|
||||||
|
IE_NAME = 'hitbox:live'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?hitbox\.tv/(?!video)(?P<id>.+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.hitbox.tv/dimak',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'dimak',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e',
|
||||||
|
'timestamp': int,
|
||||||
|
'upload_date': compat_str,
|
||||||
|
'title': compat_str,
|
||||||
|
'uploader': 'Dimak',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# live
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
metadata = self._extract_metadata(
|
||||||
|
'https://www.hitbox.tv/api/media/live',
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
player_config = self._download_json(
|
||||||
|
'https://www.hitbox.tv/api/player/config/live/%s' % video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
cdns = player_config.get('cdns')
|
||||||
|
servers = []
|
||||||
|
for cdn in cdns:
|
||||||
|
base_url = cdn.get('netConnectionUrl')
|
||||||
|
host = re.search('.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
|
||||||
|
if base_url not in servers:
|
||||||
|
servers.append(base_url)
|
||||||
|
for stream in cdn.get('bitrates'):
|
||||||
|
label = stream.get('label')
|
||||||
|
if label != 'Auto':
|
||||||
|
formats.append({
|
||||||
|
'url': '%s/%s' % (base_url, stream.get('url')),
|
||||||
|
'ext': 'mp4',
|
||||||
|
'vbr': stream.get('bitrate'),
|
||||||
|
'resolution': label,
|
||||||
|
'rtmp_live': True,
|
||||||
|
'format_note': host,
|
||||||
|
'page_url': url,
|
||||||
|
'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
metadata['formats'] = formats
|
||||||
|
metadata['is_live'] = True
|
||||||
|
metadata['title'] = self._live_title(metadata.get('title'))
|
||||||
|
return metadata
|
@@ -22,7 +22,7 @@ class InternetVideoArchiveIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'SKYFALL',
|
'title': 'SKYFALL',
|
||||||
'description': 'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
|
'description': 'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
|
||||||
'duration': 149,
|
'duration': 152,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -10,13 +10,14 @@ from ..utils import int_or_none
|
|||||||
class KontrTubeIE(InfoExtractor):
|
class KontrTubeIE(InfoExtractor):
|
||||||
IE_NAME = 'kontrtube'
|
IE_NAME = 'kontrtube'
|
||||||
IE_DESC = 'KontrTube.ru - Труба зовёт'
|
IE_DESC = 'KontrTube.ru - Труба зовёт'
|
||||||
_VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/.+'
|
_VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
|
'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
|
||||||
'md5': '975a991a4926c9a85f383a736a2e6b80',
|
'md5': '975a991a4926c9a85f383a736a2e6b80',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2678',
|
'id': '2678',
|
||||||
|
'display_id': 'nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Над олимпийской деревней в Сочи поднят российский флаг',
|
'title': 'Над олимпийской деревней в Сочи поднят российский флаг',
|
||||||
'description': 'md5:80edc4c613d5887ae8ccf1d59432be41',
|
'description': 'md5:80edc4c613d5887ae8ccf1d59432be41',
|
||||||
@@ -28,21 +29,28 @@ class KontrTubeIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
webpage = self._download_webpage(
|
||||||
|
url, display_id, 'Downloading page')
|
||||||
|
|
||||||
video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
|
video_url = self._html_search_regex(
|
||||||
thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
|
r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL')
|
||||||
|
thumbnail = self._html_search_regex(
|
||||||
|
r"preview_url\s*:\s*'(.+?)/?',", webpage, 'video thumbnail', fatal=False)
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<title>(.+?)</title>', webpage, 'video title')
|
r'<title>(.+?)</title>', webpage, 'video title')
|
||||||
description = self._html_search_meta('description', webpage, 'video description')
|
description = self._html_search_meta(
|
||||||
|
'description', webpage, 'video description')
|
||||||
|
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', webpage)
|
r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
|
||||||
|
webpage)
|
||||||
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
||||||
|
|
||||||
view_count = self._html_search_regex(
|
view_count = self._html_search_regex(
|
||||||
r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage, 'view count', fatal=False)
|
r'<div class="col_2">Просмотров: <span>(\d+)</span></div>',
|
||||||
|
webpage, 'view count', fatal=False)
|
||||||
|
|
||||||
comment_count = None
|
comment_count = None
|
||||||
comment_str = self._html_search_regex(
|
comment_str = self._html_search_regex(
|
||||||
@@ -56,6 +64,7 @@ class KontrTubeIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
@@ -17,7 +17,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class TechTVMITIE(InfoExtractor):
|
class TechTVMITIE(InfoExtractor):
|
||||||
IE_NAME = 'techtv.mit.edu'
|
IE_NAME = 'techtv.mit.edu'
|
||||||
_VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)'
|
_VALID_URL = r'https?://techtv\.mit\.edu/(?:videos|embeds)/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
|
'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
|
||||||
@@ -31,8 +31,7 @@ class TechTVMITIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
raw_page = self._download_webpage(
|
raw_page = self._download_webpage(
|
||||||
'http://techtv.mit.edu/videos/%s' % video_id, video_id)
|
'http://techtv.mit.edu/videos/%s' % video_id, video_id)
|
||||||
clean_page = re.compile(r'<!--.*?-->', re.S).sub('', raw_page)
|
clean_page = re.compile(r'<!--.*?-->', re.S).sub('', raw_page)
|
||||||
@@ -106,7 +105,7 @@ class OCWMITIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence',
|
'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence',
|
||||||
'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.',
|
'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.',
|
||||||
#'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt'
|
# 'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt'
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -116,7 +115,7 @@ class OCWMITIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Session 1: Introduction to Derivatives',
|
'title': 'Session 1: Introduction to Derivatives',
|
||||||
'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
|
'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
|
||||||
#'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT'
|
# 'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
@@ -52,7 +52,8 @@ class MoeVideoIE(InfoExtractor):
|
|||||||
'height': 296,
|
'height': 296,
|
||||||
'duration': 6027,
|
'duration': 6027,
|
||||||
'filesize': 588257923,
|
'filesize': 588257923,
|
||||||
}
|
},
|
||||||
|
'skip': 'Video has been removed',
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@@ -1,7 +1,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import time
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
@@ -64,8 +63,7 @@ class MooshareIE(InfoExtractor):
|
|||||||
'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form))
|
'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form))
|
||||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
|
|
||||||
self.to_screen('%s: Waiting for timeout' % video_id)
|
self._sleep(5, video_id)
|
||||||
time.sleep(5)
|
|
||||||
|
|
||||||
video_page = self._download_webpage(request, video_id, 'Downloading video page')
|
video_page = self._download_webpage(request, video_id, 'Downloading video page')
|
||||||
|
|
||||||
|
35
youtube_dl/extractor/nerdcubed.py
Normal file
35
youtube_dl/extractor/nerdcubed.py
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class NerdCubedFeedIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/feed\.json'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.nerdcubed.co.uk/feed.json',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'nerdcubed.co.uk feed',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1300,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
feed = self._download_json(url, url, "Downloading NerdCubed JSON feed")
|
||||||
|
|
||||||
|
entries = [{
|
||||||
|
'_type': 'url',
|
||||||
|
'title': feed_entry['title'],
|
||||||
|
'uploader': feed_entry['source']['name'] if feed_entry['source'] else None,
|
||||||
|
'upload_date': datetime.datetime.strptime(feed_entry['date'], '%Y-%m-%d').strftime('%Y%m%d'),
|
||||||
|
'url': "http://www.youtube.com/watch?v=" + feed_entry['youtube_id'],
|
||||||
|
} for feed_entry in feed]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'title': 'nerdcubed.co.uk feed',
|
||||||
|
'id': 'nerdcubed-feed',
|
||||||
|
'entries': entries,
|
||||||
|
}
|
@@ -54,7 +54,7 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
class NHLIE(NHLBaseInfoExtractor):
|
class NHLIE(NHLBaseInfoExtractor):
|
||||||
IE_NAME = 'nhl.com'
|
IE_NAME = 'nhl.com'
|
||||||
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[0-9a-z-]+)'
|
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
|
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
|
||||||
|
@@ -9,6 +9,7 @@ from ..utils import (
|
|||||||
qualities,
|
qualities,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
url_basename,
|
url_basename,
|
||||||
|
fix_xml_ampersands,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -51,7 +52,21 @@ class NPOIE(InfoExtractor):
|
|||||||
'upload_date': '20130225',
|
'upload_date': '20130225',
|
||||||
'duration': 3000,
|
'duration': 3000,
|
||||||
},
|
},
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'WO_VPRO_043706',
|
||||||
|
'ext': 'wmv',
|
||||||
|
'title': 'De nieuwe mens - Deel 1',
|
||||||
|
'description': 'md5:518ae51ba1293ffb80d8d8ce90b74e4b',
|
||||||
|
'duration': 4680,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# mplayer mms download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -74,31 +89,58 @@ class NPOIE(InfoExtractor):
|
|||||||
token = self._search_regex(r'npoplayer\.token = "(.+?)"', token_page, 'token')
|
token = self._search_regex(r'npoplayer\.token = "(.+?)"', token_page, 'token')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std'])
|
|
||||||
for format_id in metadata['pubopties']:
|
pubopties = metadata.get('pubopties')
|
||||||
format_info = self._download_json(
|
if pubopties:
|
||||||
'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s' % (video_id, format_id, token),
|
quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std'])
|
||||||
video_id, 'Downloading %s JSON' % format_id)
|
for format_id in pubopties:
|
||||||
if format_info.get('error_code', 0) or format_info.get('errorcode', 0):
|
format_info = self._download_json(
|
||||||
continue
|
'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s'
|
||||||
streams = format_info.get('streams')
|
% (video_id, format_id, token),
|
||||||
if streams:
|
video_id, 'Downloading %s JSON' % format_id)
|
||||||
video_info = self._download_json(
|
if format_info.get('error_code', 0) or format_info.get('errorcode', 0):
|
||||||
streams[0] + '&type=json',
|
continue
|
||||||
video_id, 'Downloading %s stream JSON' % format_id)
|
streams = format_info.get('streams')
|
||||||
else:
|
if streams:
|
||||||
video_info = format_info
|
video_info = self._download_json(
|
||||||
video_url = video_info.get('url')
|
streams[0] + '&type=json',
|
||||||
if not video_url:
|
video_id, 'Downloading %s stream JSON' % format_id)
|
||||||
continue
|
else:
|
||||||
if format_id == 'adaptive':
|
video_info = format_info
|
||||||
formats.extend(self._extract_m3u8_formats(video_url, video_id))
|
video_url = video_info.get('url')
|
||||||
else:
|
if not video_url:
|
||||||
|
continue
|
||||||
|
if format_id == 'adaptive':
|
||||||
|
formats.extend(self._extract_m3u8_formats(video_url, video_id))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'quality': quality(format_id),
|
||||||
|
})
|
||||||
|
|
||||||
|
streams = metadata.get('streams')
|
||||||
|
if streams:
|
||||||
|
for i, stream in enumerate(streams):
|
||||||
|
stream_url = stream.get('url')
|
||||||
|
if not stream_url:
|
||||||
|
continue
|
||||||
|
asx = self._download_xml(
|
||||||
|
stream_url, video_id,
|
||||||
|
'Downloading stream %d ASX playlist' % i,
|
||||||
|
transform_source=fix_xml_ampersands)
|
||||||
|
ref = asx.find('./ENTRY/Ref')
|
||||||
|
if ref is None:
|
||||||
|
continue
|
||||||
|
video_url = ref.get('href')
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'format_id': format_id,
|
'ext': stream.get('formaat', 'asf'),
|
||||||
'quality': quality(format_id),
|
'quality': stream.get('kwaliteit'),
|
||||||
})
|
})
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@@ -77,7 +77,7 @@ class NRKTVIE(InfoExtractor):
|
|||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||||
'md5': '7b96112fbae1faf09a6f9ae1aff6cb84',
|
'md5': 'adf2c5454fa2bf032f47a9f8fb351342',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'MUHH48000314',
|
'id': 'MUHH48000314',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
@@ -89,7 +89,7 @@ class NRKTVIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://tv.nrk.no/program/mdfp15000514',
|
'url': 'http://tv.nrk.no/program/mdfp15000514',
|
||||||
'md5': 'af01795a31f1cf7265c8657534d8077b',
|
'md5': '383650ece2b25ecec996ad7b5bb2a384',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'mdfp15000514',
|
'id': 'mdfp15000514',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
|
70
youtube_dl/extractor/openfilm.py
Normal file
70
youtube_dl/extractor/openfilm.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
parse_iso8601,
|
||||||
|
compat_urllib_parse,
|
||||||
|
parse_age_limit,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class OpenFilmIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)openfilm\.com/videos/(?P<id>.+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.openfilm.com/videos/human-resources-remastered',
|
||||||
|
'md5': '42bcd88c2f3ec13b65edf0f8ad1cac37',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '32736',
|
||||||
|
'display_id': 'human-resources-remastered',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Human Resources (Remastered)',
|
||||||
|
'description': 'Social Engineering in the 20th Century.',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 7164,
|
||||||
|
'timestamp': 1334756988,
|
||||||
|
'upload_date': '20120418',
|
||||||
|
'uploader_id': '41117',
|
||||||
|
'view_count': int,
|
||||||
|
'age_limit': 0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
player = compat_urllib_parse.unquote_plus(
|
||||||
|
self._og_search_video_url(webpage))
|
||||||
|
|
||||||
|
video = json.loads(self._search_regex(
|
||||||
|
r'\bp=({.+?})(?:&|$)', player, 'video JSON'))
|
||||||
|
|
||||||
|
video_url = '%s1.mp4' % video['location']
|
||||||
|
video_id = video.get('video_id')
|
||||||
|
display_id = video.get('alias') or display_id
|
||||||
|
title = video.get('title')
|
||||||
|
description = video.get('description')
|
||||||
|
thumbnail = video.get('main_thumb')
|
||||||
|
duration = int_or_none(video.get('duration'))
|
||||||
|
timestamp = parse_iso8601(video.get('dt_published'), ' ')
|
||||||
|
uploader_id = video.get('user_id')
|
||||||
|
view_count = int_or_none(video.get('views_count'))
|
||||||
|
age_limit = parse_age_limit(video.get('age_limit'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'view_count': view_count,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
}
|
@@ -17,9 +17,9 @@ from ..utils import (
|
|||||||
class ORFTVthekIE(InfoExtractor):
|
class ORFTVthekIE(InfoExtractor):
|
||||||
IE_NAME = 'orf:tvthek'
|
IE_NAME = 'orf:tvthek'
|
||||||
IE_DESC = 'ORF TVthek'
|
IE_DESC = 'ORF TVthek'
|
||||||
_VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)'
|
_VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics?/.+?|program/[^/]+)/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',
|
'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',
|
||||||
'playlist': [{
|
'playlist': [{
|
||||||
'md5': '2942210346ed779588f428a92db88712',
|
'md5': '2942210346ed779588f428a92db88712',
|
||||||
@@ -32,8 +32,21 @@ class ORFTVthekIE(InfoExtractor):
|
|||||||
'upload_date': '20141208',
|
'upload_date': '20141208',
|
||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
'skip': 'Blocked outside of Austria',
|
'skip': 'Blocked outside of Austria / Germany',
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256',
|
||||||
|
'playlist': [{
|
||||||
|
'md5': '68f543909aea49d621dfc7703a11cfaf',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7982259',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Best of Ingrid Thurnher',
|
||||||
|
'upload_date': '20140527',
|
||||||
|
'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".',
|
||||||
|
}
|
||||||
|
}],
|
||||||
|
'_skip': 'Blocked outside of Austria / Germany',
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
playlist_id = self._match_id(url)
|
||||||
@@ -45,7 +58,9 @@ class ORFTVthekIE(InfoExtractor):
|
|||||||
|
|
||||||
def get_segments(all_data):
|
def get_segments(all_data):
|
||||||
for data in all_data:
|
for data in all_data:
|
||||||
if data['name'] == 'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM':
|
if data['name'] in (
|
||||||
|
'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM',
|
||||||
|
'Tracker::EPISODE_DETAIL_PAGE_OVER_TOPIC'):
|
||||||
return data['values']['segments']
|
return data['values']['segments']
|
||||||
|
|
||||||
sdata = get_segments(all_data)
|
sdata = get_segments(all_data)
|
||||||
|
@@ -4,6 +4,7 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
US_RATINGS,
|
US_RATINGS,
|
||||||
)
|
)
|
||||||
@@ -151,6 +152,19 @@ class PBSIE(InfoExtractor):
|
|||||||
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
||||||
info = self._download_json(info_url, display_id)
|
info = self._download_json(info_url, display_id)
|
||||||
|
|
||||||
|
redirect_url = info['alternate_encoding']['url']
|
||||||
|
redirect_info = self._download_json(
|
||||||
|
redirect_url + '?format=json', display_id,
|
||||||
|
'Downloading video url info')
|
||||||
|
if redirect_info['status'] == 'error':
|
||||||
|
if redirect_info['http_code'] == 403:
|
||||||
|
message = (
|
||||||
|
'The video is not available in your region due to '
|
||||||
|
'right restrictions')
|
||||||
|
else:
|
||||||
|
message = redirect_info['message']
|
||||||
|
raise ExtractorError(message, expected=True)
|
||||||
|
|
||||||
rating_str = info.get('rating')
|
rating_str = info.get('rating')
|
||||||
if rating_str is not None:
|
if rating_str is not None:
|
||||||
rating_str = rating_str.rpartition('-')[2]
|
rating_str = rating_str.rpartition('-')[2]
|
||||||
@@ -160,7 +174,7 @@ class PBSIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': info['title'],
|
'title': info['title'],
|
||||||
'url': info['alternate_encoding']['url'],
|
'url': redirect_info['url'],
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'description': info['program'].get('description'),
|
'description': info['program'].get('description'),
|
||||||
'thumbnail': info.get('image_url'),
|
'thumbnail': info.get('image_url'),
|
||||||
|
@@ -26,6 +26,7 @@ class PlayedIE(InfoExtractor):
|
|||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'youtube-dl_test_video.mp4',
|
'title': 'youtube-dl_test_video.mp4',
|
||||||
},
|
},
|
||||||
|
'skip': 'Removed for copyright infringement.', # oh wow
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -8,11 +8,11 @@ from ..utils import js_to_json
|
|||||||
|
|
||||||
|
|
||||||
class RTPIE(InfoExtractor):
|
class RTPIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/e(?P<id>[0-9]+)/?'
|
_VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
|
'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '174042',
|
'id': 'e174042',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Paixões Cruzadas',
|
'title': 'Paixões Cruzadas',
|
||||||
'description': 'As paixões musicais de António Cartaxo e António Macedo',
|
'description': 'As paixões musicais de António Cartaxo e António Macedo',
|
||||||
@@ -21,7 +21,10 @@ class RTPIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # RTMP download
|
'skip_download': True, # RTMP download
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
49
youtube_dl/extractor/screencastomatic.py
Normal file
49
youtube_dl/extractor/screencastomatic.py
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urlparse
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
js_to_json,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ScreencastOMaticIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P<id>[0-9a-zA-Z]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl',
|
||||||
|
'md5': '483583cb80d92588f15ccbedd90f0c18',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'c2lD3BeOPl',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Welcome to 3-4 Philosophy @ DECV!',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
setup_js = self._search_regex(
|
||||||
|
r"(?s)jwplayer\('mp4Player'\).setup\((\{.*?\})\);",
|
||||||
|
webpage, 'setup code')
|
||||||
|
data = self._parse_json(setup_js, video_id, transform_source=js_to_json)
|
||||||
|
try:
|
||||||
|
video_data = next(
|
||||||
|
m for m in data['modes'] if m.get('type') == 'html5')
|
||||||
|
except StopIteration:
|
||||||
|
raise ExtractorError('Could not find any video entries!')
|
||||||
|
video_url = compat_urlparse.urljoin(url, video_data['config']['file'])
|
||||||
|
thumbnail = data.get('image')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
@@ -30,7 +30,7 @@ class SlideshareIE(InfoExtractor):
|
|||||||
page_title = mobj.group('title')
|
page_title = mobj.group('title')
|
||||||
webpage = self._download_webpage(url, page_title)
|
webpage = self._download_webpage(url, page_title)
|
||||||
slideshare_obj = self._search_regex(
|
slideshare_obj = self._search_regex(
|
||||||
r'var slideshare_object = ({.*?}); var user_info =',
|
r'var\s+slideshare_object\s*=\s*({.*?});\s*var\s+user_info\s*=',
|
||||||
webpage, 'slideshare object')
|
webpage, 'slideshare object')
|
||||||
info = json.loads(slideshare_obj)
|
info = json.loads(slideshare_obj)
|
||||||
if info['slideshow']['type'] != 'video':
|
if info['slideshow']['type'] != 'video':
|
||||||
@@ -41,7 +41,7 @@ class SlideshareIE(InfoExtractor):
|
|||||||
ext = info['jsplayer']['video_extension']
|
ext = info['jsplayer']['video_extension']
|
||||||
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
|
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'<p\s+(?:style="[^"]*"\s+)?class=".*?description.*?"[^>]*>(.*?)</p>', webpage,
|
r'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>', webpage,
|
||||||
'description', fatal=False)
|
'description', fatal=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@@ -69,6 +69,7 @@ class SmotriIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'videopassword': 'qwerty',
|
'videopassword': 'qwerty',
|
||||||
},
|
},
|
||||||
|
'skip': 'Video is not approved by moderator',
|
||||||
},
|
},
|
||||||
# age limit + video-password
|
# age limit + video-password
|
||||||
{
|
{
|
||||||
@@ -86,7 +87,8 @@ class SmotriIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'videopassword': '333'
|
'videopassword': '333'
|
||||||
}
|
},
|
||||||
|
'skip': 'Video is not approved by moderator',
|
||||||
},
|
},
|
||||||
# swf player
|
# swf player
|
||||||
{
|
{
|
||||||
|
@@ -1,11 +1,10 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError
|
from .common import compat_str
|
||||||
|
|
||||||
|
|
||||||
class SohuIE(InfoExtractor):
|
class SohuIE(InfoExtractor):
|
||||||
@@ -29,60 +28,73 @@ class SohuIE(InfoExtractor):
|
|||||||
base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid='
|
base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid='
|
||||||
else:
|
else:
|
||||||
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
|
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
|
||||||
data_url = base_data_url + str(vid_id)
|
|
||||||
data_json = self._download_webpage(
|
return self._download_json(
|
||||||
data_url, video_id,
|
base_data_url + vid_id, video_id,
|
||||||
note='Downloading JSON data for ' + str(vid_id))
|
'Downloading JSON data for %s' % vid_id)
|
||||||
return json.loads(data_json)
|
|
||||||
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
mytv = mobj.group('mytv') is not None
|
mytv = mobj.group('mytv') is not None
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>',
|
raw_title = self._html_search_regex(
|
||||||
webpage, 'video title')
|
r'(?s)<title>(.+?)</title>',
|
||||||
|
webpage, 'video title')
|
||||||
title = raw_title.partition('-')[0].strip()
|
title = raw_title.partition('-')[0].strip()
|
||||||
|
|
||||||
vid = self._html_search_regex(r'var vid ?= ?["\'](\d+)["\']', webpage,
|
vid = self._html_search_regex(
|
||||||
'video path')
|
r'var vid ?= ?["\'](\d+)["\']',
|
||||||
data = _fetch_data(vid, mytv)
|
webpage, 'video path')
|
||||||
|
vid_data = _fetch_data(vid, mytv)
|
||||||
|
|
||||||
QUALITIES = ('ori', 'super', 'high', 'nor')
|
formats_json = {}
|
||||||
vid_ids = [data['data'][q + 'Vid']
|
for format_id in ('nor', 'high', 'super', 'ori', 'h2644k', 'h2654k'):
|
||||||
for q in QUALITIES
|
vid_id = vid_data['data'].get('%sVid' % format_id)
|
||||||
if data['data'][q + 'Vid'] != 0]
|
if not vid_id:
|
||||||
if not vid_ids:
|
continue
|
||||||
raise ExtractorError('No formats available for this video')
|
vid_id = compat_str(vid_id)
|
||||||
|
formats_json[format_id] = vid_data if vid == vid_id else _fetch_data(vid_id, mytv)
|
||||||
|
|
||||||
# For now, we just pick the highest available quality
|
part_count = vid_data['data']['totalBlocks']
|
||||||
vid_id = vid_ids[-1]
|
|
||||||
|
|
||||||
format_data = data if vid == vid_id else _fetch_data(vid_id, mytv)
|
|
||||||
part_count = format_data['data']['totalBlocks']
|
|
||||||
allot = format_data['allot']
|
|
||||||
prot = format_data['prot']
|
|
||||||
clipsURL = format_data['data']['clipsURL']
|
|
||||||
su = format_data['data']['su']
|
|
||||||
|
|
||||||
playlist = []
|
playlist = []
|
||||||
for i in range(part_count):
|
for i in range(part_count):
|
||||||
part_url = ('http://%s/?prot=%s&file=%s&new=%s' %
|
formats = []
|
||||||
(allot, prot, clipsURL[i], su[i]))
|
for format_id, format_data in formats_json.items():
|
||||||
part_str = self._download_webpage(
|
allot = format_data['allot']
|
||||||
part_url, video_id,
|
prot = format_data['prot']
|
||||||
note='Downloading part %d of %d' % (i + 1, part_count))
|
|
||||||
|
|
||||||
part_info = part_str.split('|')
|
data = format_data['data']
|
||||||
video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
|
clips_url = data['clipsURL']
|
||||||
|
su = data['su']
|
||||||
|
|
||||||
video_info = {
|
part_str = self._download_webpage(
|
||||||
'id': '%s_part%02d' % (video_id, i + 1),
|
'http://%s/?prot=%s&file=%s&new=%s' %
|
||||||
|
(allot, prot, clips_url[i], su[i]),
|
||||||
|
video_id,
|
||||||
|
'Downloading %s video URL part %d of %d'
|
||||||
|
% (format_id, i + 1, part_count))
|
||||||
|
|
||||||
|
part_info = part_str.split('|')
|
||||||
|
video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'filesize': data['clipsBytes'][i],
|
||||||
|
'width': data['width'],
|
||||||
|
'height': data['height'],
|
||||||
|
'fps': data['fps'],
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
playlist.append({
|
||||||
|
'id': '%s_part%d' % (video_id, i + 1),
|
||||||
'title': title,
|
'title': title,
|
||||||
'url': video_url,
|
'duration': vid_data['data']['clipsDuration'][i],
|
||||||
'ext': 'mp4',
|
'formats': formats,
|
||||||
}
|
})
|
||||||
playlist.append(video_info)
|
|
||||||
|
|
||||||
if len(playlist) == 1:
|
if len(playlist) == 1:
|
||||||
info = playlist[0]
|
info = playlist[0]
|
||||||
|
@@ -60,9 +60,10 @@ class SportDeutschlandIE(InfoExtractor):
|
|||||||
|
|
||||||
categories = list(data.get('section', {}).get('tags', {}).values())
|
categories = list(data.get('section', {}).get('tags', {}).values())
|
||||||
asset = data['asset']
|
asset = data['asset']
|
||||||
|
assets_info = self._download_json(asset['url'], video_id)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
smil_url = asset['video']
|
smil_url = assets_info['video']
|
||||||
if '.smil' in smil_url:
|
if '.smil' in smil_url:
|
||||||
m3u8_url = smil_url.replace('.smil', '.m3u8')
|
m3u8_url = smil_url.replace('.smil', '.m3u8')
|
||||||
formats.extend(
|
formats.extend(
|
||||||
|
@@ -2,7 +2,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import time
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
@@ -40,8 +39,7 @@ class StreamcloudIE(InfoExtractor):
|
|||||||
''', orig_webpage)
|
''', orig_webpage)
|
||||||
post = compat_urllib_parse.urlencode(fields)
|
post = compat_urllib_parse.urlencode(fields)
|
||||||
|
|
||||||
self.to_screen('%s: Waiting for timeout' % video_id)
|
self._sleep(12, video_id)
|
||||||
time.sleep(12)
|
|
||||||
headers = {
|
headers = {
|
||||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||||
}
|
}
|
||||||
|
@@ -28,23 +28,27 @@ class SunPornoIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
|
title = self._html_search_regex(
|
||||||
description = self._html_search_meta('description', webpage, 'description')
|
r'<title>([^<]+)</title>', webpage, 'title')
|
||||||
|
description = self._html_search_meta(
|
||||||
|
'description', webpage, 'description')
|
||||||
thumbnail = self._html_search_regex(
|
thumbnail = self._html_search_regex(
|
||||||
r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
|
r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
duration = parse_duration(self._search_regex(
|
duration = parse_duration(self._search_regex(
|
||||||
r'Duration:\s*(\d+:\d+)\s*<', webpage, 'duration', fatal=False))
|
r'itemprop="duration">\s*(\d+:\d+)\s*<',
|
||||||
|
webpage, 'duration', fatal=False))
|
||||||
|
|
||||||
view_count = int_or_none(self._html_search_regex(
|
view_count = int_or_none(self._html_search_regex(
|
||||||
r'class="views">\s*(\d+)\s*<', webpage, 'view count', fatal=False))
|
r'class="views">\s*(\d+)\s*<',
|
||||||
|
webpage, 'view count', fatal=False))
|
||||||
comment_count = int_or_none(self._html_search_regex(
|
comment_count = int_or_none(self._html_search_regex(
|
||||||
r'(\d+)</b> Comments?', webpage, 'comment count', fatal=False))
|
r'(\d+)</b> Comments?',
|
||||||
|
webpage, 'comment count', fatal=False))
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
quality = qualities(['mp4', 'flv'])
|
quality = qualities(['mp4', 'flv'])
|
||||||
|
@@ -6,7 +6,7 @@ from .mitele import MiTeleIE
|
|||||||
|
|
||||||
class TelecincoIE(MiTeleIE):
|
class TelecincoIE(MiTeleIE):
|
||||||
IE_NAME = 'telecinco.es'
|
IE_NAME = 'telecinco.es'
|
||||||
_VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<episode>.*?)\.html'
|
_VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<id>.*?)\.html'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
|
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
|
||||||
|
53
youtube_dl/extractor/teletask.py
Normal file
53
youtube_dl/extractor/teletask.py
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import unified_strdate
|
||||||
|
|
||||||
|
|
||||||
|
class TeleTaskIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?tele-task\.de/archive/video/html5/(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.tele-task.de/archive/video/html5/26168/',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Duplicate Detection',
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'md5': '290ef69fb2792e481169c3958dbfbd57',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '26168-speaker',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Duplicate Detection',
|
||||||
|
'upload_date': '20141218',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'md5': 'e1e7218c5f0e4790015a437fcf6c71b4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '26168-slides',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Duplicate Detection',
|
||||||
|
'upload_date': '20141218',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
lecture_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, lecture_id)
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'itemprop="name">([^<]+)</a>', webpage, 'title')
|
||||||
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
|
r'Date:</td><td>([^<]+)</td>', webpage, 'date', fatal=False))
|
||||||
|
|
||||||
|
entries = [{
|
||||||
|
'id': '%s-%s' % (lecture_id, format_id),
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
} for format_id, video_url in re.findall(
|
||||||
|
r'<video class="([^"]+)"[^>]*>\s*<source src="([^"]+)"', webpage)]
|
||||||
|
|
||||||
|
return self.playlist_result(entries, lecture_id, title)
|
@@ -8,7 +8,6 @@ class TenPlayIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?ten(play)?\.com\.au/.+'
|
_VALID_URL = r'https?://(?:www\.)?ten(play)?\.com\.au/.+'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://tenplay.com.au/ten-insider/extra/season-2013/tenplay-tv-your-way',
|
'url': 'http://tenplay.com.au/ten-insider/extra/season-2013/tenplay-tv-your-way',
|
||||||
#'md5': 'd68703d9f73dc8fccf3320ab34202590',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2695695426001',
|
'id': '2695695426001',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
|
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_str,
|
compat_str,
|
||||||
)
|
)
|
||||||
@@ -16,7 +16,7 @@ from ..utils import (
|
|||||||
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
|
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
|
||||||
|
|
||||||
|
|
||||||
class ThePlatformIE(InfoExtractor):
|
class ThePlatformIE(SubtitlesInfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/
|
(?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/
|
||||||
(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
|
(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
|
||||||
@@ -66,6 +66,20 @@ class ThePlatformIE(InfoExtractor):
|
|||||||
info_json = self._download_webpage(info_url, video_id)
|
info_json = self._download_webpage(info_url, video_id)
|
||||||
info = json.loads(info_json)
|
info = json.loads(info_json)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
captions = info.get('captions')
|
||||||
|
if isinstance(captions, list):
|
||||||
|
for caption in captions:
|
||||||
|
lang, src = caption.get('lang'), caption.get('src')
|
||||||
|
if lang and src:
|
||||||
|
subtitles[lang] = src
|
||||||
|
|
||||||
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
self._list_available_subtitles(video_id, subtitles)
|
||||||
|
return
|
||||||
|
|
||||||
|
subtitles = self.extract_subtitles(video_id, subtitles)
|
||||||
|
|
||||||
head = meta.find(_x('smil:head'))
|
head = meta.find(_x('smil:head'))
|
||||||
body = meta.find(_x('smil:body'))
|
body = meta.find(_x('smil:body'))
|
||||||
|
|
||||||
@@ -117,6 +131,7 @@ class ThePlatformIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': info['title'],
|
'title': info['title'],
|
||||||
|
'subtitles': subtitles,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': info['description'],
|
'description': info['description'],
|
||||||
'thumbnail': info['defaultThumbnailUrl'],
|
'thumbnail': info['defaultThumbnailUrl'],
|
||||||
|
@@ -15,7 +15,7 @@ class TMZIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!',
|
'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!',
|
||||||
'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie??? Or is she just showing off her amazing boobs?',
|
'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie??? Or is she just showing off her amazing boobs?',
|
||||||
'thumbnail': 'http://cdnbakmi.kaltura.com/p/591531/sp/59153100/thumbnail/entry_id/0_okj015ty/version/100002/acv/182/width/640',
|
'thumbnail': r're:http://cdnbakmi\.kaltura\.com/.*thumbnail.*',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
class TNAFlixIE(InfoExtractor):
|
class TNAFlixIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?tnaflix\.com/(?P<cat_id>[\w-]+)/(?P<display_id>[\w-]+)/video(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?tnaflix\.com/(?P<cat_id>[\w-]+)/(?P<display_id>[\w-]+)/video(?P<id>\d+)'
|
||||||
|
|
||||||
_TITLE_REGEX = None
|
_TITLE_REGEX = r'<title>(.+?) - TNAFlix Porn Videos</title>'
|
||||||
_DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
|
_DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
|
||||||
_CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
|
_CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
|
||||||
|
|
||||||
@@ -49,8 +49,8 @@ class TNAFlixIE(InfoExtractor):
|
|||||||
if duration:
|
if duration:
|
||||||
duration = parse_duration(duration[1:])
|
duration = parse_duration(duration[1:])
|
||||||
|
|
||||||
cfg_url = self._html_search_regex(
|
cfg_url = self._proto_relative_url(self._html_search_regex(
|
||||||
self._CONFIG_REGEX, webpage, 'flashvars.config')
|
self._CONFIG_REGEX, webpage, 'flashvars.config'), 'http:')
|
||||||
|
|
||||||
cfg_xml = self._download_xml(
|
cfg_xml = self._download_xml(
|
||||||
cfg_url, display_id, note='Downloading metadata',
|
cfg_url, display_id, note='Downloading metadata',
|
||||||
|
@@ -22,6 +22,7 @@ class TwitchIE(InfoExtractor):
|
|||||||
_VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?twitch\.tv/
|
_VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?twitch\.tv/
|
||||||
(?:
|
(?:
|
||||||
(?P<channelid>[^/]+)|
|
(?P<channelid>[^/]+)|
|
||||||
|
(?:(?:[^/]+)/v/(?P<vodid>[^/]+))|
|
||||||
(?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
|
(?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
|
||||||
(?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
|
(?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
|
||||||
)
|
)
|
||||||
@@ -70,11 +71,24 @@ class TwitchIE(InfoExtractor):
|
|||||||
def _extract_media(self, item, item_id):
|
def _extract_media(self, item, item_id):
|
||||||
ITEMS = {
|
ITEMS = {
|
||||||
'a': 'video',
|
'a': 'video',
|
||||||
|
'v': 'vod',
|
||||||
'c': 'chapter',
|
'c': 'chapter',
|
||||||
}
|
}
|
||||||
info = self._extract_info(self._download_json(
|
info = self._extract_info(self._download_json(
|
||||||
'%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
|
'%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
|
||||||
'Downloading %s info JSON' % ITEMS[item]))
|
'Downloading %s info JSON' % ITEMS[item]))
|
||||||
|
|
||||||
|
if item == 'v':
|
||||||
|
access_token = self._download_json(
|
||||||
|
'%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id,
|
||||||
|
'Downloading %s access token' % ITEMS[item])
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
'http://usher.twitch.tv/vod/%s?nauth=%s&nauthsig=%s'
|
||||||
|
% (item_id, access_token['token'], access_token['sig']),
|
||||||
|
item_id, 'mp4')
|
||||||
|
info['formats'] = formats
|
||||||
|
return info
|
||||||
|
|
||||||
response = self._download_json(
|
response = self._download_json(
|
||||||
'%s/api/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
|
'%s/api/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
|
||||||
'Downloading %s playlist JSON' % ITEMS[item])
|
'Downloading %s playlist JSON' % ITEMS[item])
|
||||||
@@ -209,6 +223,8 @@ class TwitchIE(InfoExtractor):
|
|||||||
"""
|
"""
|
||||||
elif mobj.group('videoid'):
|
elif mobj.group('videoid'):
|
||||||
return self._extract_media('a', mobj.group('videoid'))
|
return self._extract_media('a', mobj.group('videoid'))
|
||||||
|
elif mobj.group('vodid'):
|
||||||
|
return self._extract_media('v', mobj.group('vodid'))
|
||||||
elif mobj.group('channelid'):
|
elif mobj.group('channelid'):
|
||||||
channel_id = mobj.group('channelid')
|
channel_id = mobj.group('channelid')
|
||||||
info = self._download_json(
|
info = self._download_json(
|
||||||
|
118
youtube_dl/extractor/vier.py
Normal file
118
youtube_dl/extractor/vier.py
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class VierIE(InfoExtractor):
|
||||||
|
IE_NAME = 'vier'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?vier\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '16129',
|
||||||
|
'display_id': 'het-wordt-warm-de-moestuin',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Het wordt warm in De Moestuin',
|
||||||
|
'description': 'De vele uren werk eisen hun tol. Wim droomt van assistentie...',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.vier.be/video/v3/embed/16129',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
embed_id = mobj.group('embed_id')
|
||||||
|
display_id = mobj.group('display_id') or embed_id
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'"nid"\s*:\s*"(\d+)"', webpage, 'video id')
|
||||||
|
application = self._search_regex(
|
||||||
|
r'"application"\s*:\s*"([^"]+)"', webpage, 'application', default='vier_vod')
|
||||||
|
filename = self._search_regex(
|
||||||
|
r'"filename"\s*:\s*"([^"]+)"', webpage, 'filename')
|
||||||
|
|
||||||
|
playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename)
|
||||||
|
formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4')
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage, default=display_id)
|
||||||
|
description = self._og_search_description(webpage, default=None)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class VierVideosIE(InfoExtractor):
|
||||||
|
IE_NAME = 'vier:videos'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?vier\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.vier.be/demoestuin/videos',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'demoestuin',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 153,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.vier.be/demoestuin/videos?page=6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'demoestuin-page6',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 20,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.vier.be/demoestuin/videos?page=7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'demoestuin-page7',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 13,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
program = mobj.group('program')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, program)
|
||||||
|
|
||||||
|
page_id = mobj.group('page')
|
||||||
|
if page_id:
|
||||||
|
page_id = int(page_id)
|
||||||
|
start_page = page_id
|
||||||
|
last_page = start_page + 1
|
||||||
|
playlist_id = '%s-page%d' % (program, page_id)
|
||||||
|
else:
|
||||||
|
start_page = 0
|
||||||
|
last_page = int(self._search_regex(
|
||||||
|
r'videos\?page=(\d+)">laatste</a>',
|
||||||
|
webpage, 'last page', default=0)) + 1
|
||||||
|
playlist_id = program
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for current_page_id in range(start_page, last_page):
|
||||||
|
current_page = self._download_webpage(
|
||||||
|
'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id),
|
||||||
|
program,
|
||||||
|
'Downloading page %d' % (current_page_id + 1)) if current_page_id != page_id else webpage
|
||||||
|
page_entries = [
|
||||||
|
self.url_result('http://www.vier.be' + video_url, 'Vier')
|
||||||
|
for video_url in re.findall(
|
||||||
|
r'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)]
|
||||||
|
entries.extend(page_entries)
|
||||||
|
|
||||||
|
return self.playlist_result(entries, playlist_id)
|
@@ -1,46 +1,42 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_iso8601,
|
|
||||||
float_or_none,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
parse_filesize,
|
||||||
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class XboxClipsIE(InfoExtractor):
|
class XboxClipsIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?xboxclips\.com/video\.php\?.*vid=(?P<id>[\w-]{36})'
|
_VALID_URL = r'https?://(?:www\.)?xboxclips\.com/(?:video\.php\?.*vid=|[^/]+/)(?P<id>[\w-]{36})'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
|
'url': 'https://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
|
||||||
'md5': 'fbe1ec805e920aeb8eced3c3e657df5d',
|
'md5': 'fbe1ec805e920aeb8eced3c3e657df5d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '074a69a9-5faf-46aa-b93b-9909c1720325',
|
'id': '074a69a9-5faf-46aa-b93b-9909c1720325',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Iabdulelah playing Upload Studio',
|
'title': 'Iabdulelah playing Titanfall',
|
||||||
'filesize_approx': 28101836.8,
|
'filesize_approx': 26800000,
|
||||||
'timestamp': 1407388500,
|
|
||||||
'upload_date': '20140807',
|
'upload_date': '20140807',
|
||||||
'duration': 56,
|
'duration': 56,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_url = self._html_search_regex(
|
video_url = self._html_search_regex(
|
||||||
r'>Link: <a href="([^"]+)">', webpage, 'video URL')
|
r'>(?:Link|Download): <a href="([^"]+)">', webpage, 'video URL')
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<title>XboxClips \| ([^<]+)</title>', webpage, 'title')
|
r'<title>XboxClips \| ([^<]+)</title>', webpage, 'title')
|
||||||
timestamp = parse_iso8601(self._html_search_regex(
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
r'>Recorded: ([^<]+)<', webpage, 'upload date', fatal=False))
|
r'>Recorded: ([^<]+)<', webpage, 'upload date', fatal=False))
|
||||||
filesize = float_or_none(self._html_search_regex(
|
filesize = parse_filesize(self._html_search_regex(
|
||||||
r'>Size: ([\d\.]+)MB<', webpage, 'file size', fatal=False), invscale=1024 * 1024)
|
r'>Size: ([^<]+)<', webpage, 'file size', fatal=False))
|
||||||
duration = int_or_none(self._html_search_regex(
|
duration = int_or_none(self._html_search_regex(
|
||||||
r'>Duration: (\d+) Seconds<', webpage, 'duration', fatal=False))
|
r'>Duration: (\d+) Seconds<', webpage, 'duration', fatal=False))
|
||||||
view_count = int_or_none(self._html_search_regex(
|
view_count = int_or_none(self._html_search_regex(
|
||||||
@@ -50,7 +46,7 @@ class XboxClipsIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'title': title,
|
'title': title,
|
||||||
'timestamp': timestamp,
|
'upload_date': upload_date,
|
||||||
'filesize_approx': filesize,
|
'filesize_approx': filesize,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
|
@@ -14,7 +14,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class XHamsterIE(InfoExtractor):
|
class XHamsterIE(InfoExtractor):
|
||||||
"""Information Extractor for xHamster"""
|
"""Information Extractor for xHamster"""
|
||||||
_VALID_URL = r'http://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
|
_VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
|
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
|
||||||
@@ -39,7 +39,11 @@ class XHamsterIE(InfoExtractor):
|
|||||||
'duration': 200,
|
'duration': 200,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -57,7 +61,8 @@ class XHamsterIE(InfoExtractor):
|
|||||||
|
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
seo = mobj.group('seo')
|
seo = mobj.group('seo')
|
||||||
mrss_url = 'http://xhamster.com/movies/%s/%s.html' % (video_id, seo)
|
proto = mobj.group('proto')
|
||||||
|
mrss_url = '%s://xhamster.com/movies/%s/%s.html' % (proto, video_id, seo)
|
||||||
webpage = self._download_webpage(mrss_url, video_id)
|
webpage = self._download_webpage(mrss_url, video_id)
|
||||||
|
|
||||||
title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', webpage, 'title')
|
title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', webpage, 'title')
|
||||||
|
@@ -1,11 +1,11 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
|
compat_urllib_parse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_duration,
|
parse_duration,
|
||||||
@@ -14,7 +14,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class XTubeIE(InfoExtractor):
|
class XTubeIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<id>[^/?&]+))'
|
_VALID_URL = r'https?://(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<id>[^/?&#]+))'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_',
|
'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_',
|
||||||
'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab',
|
'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab',
|
||||||
@@ -30,41 +30,49 @@ class XTubeIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
url = 'http://www.' + mobj.group('url')
|
|
||||||
|
|
||||||
req = compat_urllib_request.Request(url)
|
req = compat_urllib_request.Request(url)
|
||||||
req.add_header('Cookie', 'age_verified=1')
|
req.add_header('Cookie', 'age_verified=1')
|
||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<p class="title">([^<]+)', webpage, 'title')
|
video_title = self._html_search_regex(
|
||||||
|
r'<p class="title">([^<]+)', webpage, 'title')
|
||||||
video_uploader = self._html_search_regex(
|
video_uploader = self._html_search_regex(
|
||||||
r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False)
|
[r"var\s+contentOwnerId\s*=\s*'([^']+)",
|
||||||
|
r'By:\s*<a href="/community/profile\.php\?user=([^"]+)'],
|
||||||
|
webpage, 'uploader', fatal=False)
|
||||||
video_description = self._html_search_regex(
|
video_description = self._html_search_regex(
|
||||||
r'<p class="fieldsDesc">([^<]+)', webpage, 'description', fatal=False)
|
r'<p class="fieldsDesc">([^<]+)',
|
||||||
|
webpage, 'description', fatal=False)
|
||||||
duration = parse_duration(self._html_search_regex(
|
duration = parse_duration(self._html_search_regex(
|
||||||
r'<span class="bold">Runtime:</span> ([^<]+)</p>', webpage, 'duration', fatal=False))
|
r'<span class="bold">Runtime:</span> ([^<]+)</p>',
|
||||||
view_count = self._html_search_regex(
|
webpage, 'duration', fatal=False))
|
||||||
r'<span class="bold">Views:</span> ([\d,\.]+)</p>', webpage, 'view count', fatal=False)
|
view_count = str_to_int(self._html_search_regex(
|
||||||
if view_count:
|
r'<span class="bold">Views:</span> ([\d,\.]+)</p>',
|
||||||
view_count = str_to_int(view_count)
|
webpage, 'view count', fatal=False))
|
||||||
comment_count = self._html_search_regex(
|
comment_count = str_to_int(self._html_search_regex(
|
||||||
r'<div id="commentBar">([\d,\.]+) Comments</div>', webpage, 'comment count', fatal=False)
|
r'<div id="commentBar">([\d,\.]+) Comments</div>',
|
||||||
if comment_count:
|
webpage, 'comment count', fatal=False))
|
||||||
comment_count = str_to_int(comment_count)
|
|
||||||
|
|
||||||
player_quality_option = json.loads(self._html_search_regex(
|
formats = []
|
||||||
r'playerQualityOption = ({.+?});', webpage, 'player quality option'))
|
for format_id, video_url in re.findall(
|
||||||
|
r'flashvars\.quality_(.+?)\s*=\s*"([^"]+)"', webpage):
|
||||||
QUALITIES = ['3gp', 'mp4_normal', 'mp4_high', 'flv', 'mp4_ultra', 'mp4_720', 'mp4_1080']
|
fmt = {
|
||||||
formats = [
|
'url': compat_urllib_parse.unquote(video_url),
|
||||||
{
|
|
||||||
'url': furl,
|
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'preference': QUALITIES.index(format_id) if format_id in QUALITIES else -1,
|
}
|
||||||
} for format_id, furl in player_quality_option.items()
|
m = re.search(r'^(?P<height>\d+)[pP]', format_id)
|
||||||
]
|
if m:
|
||||||
|
fmt['height'] = int(m.group('height'))
|
||||||
|
formats.append(fmt)
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
video_url = compat_urllib_parse.unquote(self._search_regex(
|
||||||
|
r'flashvars\.video_url\s*=\s*"([^"]+)"',
|
||||||
|
webpage, 'video URL'))
|
||||||
|
formats.append({'url': video_url})
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
81
youtube_dl/extractor/xxxymovies.py
Normal file
81
youtube_dl/extractor/xxxymovies.py
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
parse_duration,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class XXXYMoviesIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?xxxymovies\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://xxxymovies.com/videos/138669/ecstatic-orgasm-sofcore/',
|
||||||
|
'md5': '810b1bdbbffff89dd13bdb369fe7be4b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '138669',
|
||||||
|
'display_id': 'ecstatic-orgasm-sofcore',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ecstatic Orgasm Sofcore',
|
||||||
|
'duration': 931,
|
||||||
|
'categories': list,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r"video_url\s*:\s*'([^']+)'", webpage, 'video URL')
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
[r'<div class="block_header">\s*<h1>([^<]+)</h1>',
|
||||||
|
r'<title>(.*?)\s*-\s*XXXYMovies\.com</title>'],
|
||||||
|
webpage, 'title')
|
||||||
|
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r"preview_url\s*:\s*'([^']+)'",
|
||||||
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
categories = self._html_search_meta(
|
||||||
|
'keywords', webpage, 'categories', default='').split(',')
|
||||||
|
|
||||||
|
duration = parse_duration(self._search_regex(
|
||||||
|
r'<span>Duration:</span>\s*(\d+:\d+)',
|
||||||
|
webpage, 'duration', fatal=False))
|
||||||
|
|
||||||
|
view_count = int_or_none(self._html_search_regex(
|
||||||
|
r'<div class="video_views">\s*(\d+)',
|
||||||
|
webpage, 'view count', fatal=False))
|
||||||
|
like_count = int_or_none(self._search_regex(
|
||||||
|
r'>\s*Likes? <b>\((\d+)\)',
|
||||||
|
webpage, 'like count', fatal=False))
|
||||||
|
dislike_count = int_or_none(self._search_regex(
|
||||||
|
r'>\s*Dislike <b>\((\d+)\)</b>',
|
||||||
|
webpage, 'dislike count', fatal=False))
|
||||||
|
|
||||||
|
age_limit = self._rta_search(webpage)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'categories': categories,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'like_count': like_count,
|
||||||
|
'dislike_count': dislike_count,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
}
|
@@ -12,6 +12,7 @@ from ..compat import (
|
|||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
|
unescapeHTML,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
)
|
)
|
||||||
@@ -55,14 +56,14 @@ class YahooIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'https://tw.screen.yahoo.com/taipei-opinion-poll/選情站報-街頭民調-台北市篇-102823042.html',
|
'url': 'https://tw.screen.yahoo.com/election-2014-askmayor/敢問市長-黃秀霜批賴清德-非常高傲-033009720.html',
|
||||||
'md5': '92a7fdd8a08783c68a174d7aa067dde8',
|
'md5': '3a09cf59349cfaddae1797acc3c087fc',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '7a23b569-7bea-36cb-85b9-bd5301a0a1fb',
|
'id': 'cac903b3-fcf4-3c14-b632-643ab541712f',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '選情站報 街頭民調 台北市篇',
|
'title': '敢問市長/黃秀霜批賴清德「非常高傲」',
|
||||||
'description': '選情站報 街頭民調 台北市篇',
|
'description': '直言台南沒捷運 交通居五都之末',
|
||||||
'duration': 429,
|
'duration': 396,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -87,14 +88,14 @@ class YahooIE(InfoExtractor):
|
|||||||
'duration': 121,
|
'duration': 121,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://ca.finance.yahoo.com/news/20-most-valuable-brands-world-112600775.html',
|
'url': 'https://ca.finance.yahoo.com/news/hackers-sony-more-trouble-well-154609075.html',
|
||||||
'md5': '3e401e4eed6325aa29d9b96125fd5b4f',
|
'md5': '226a895aae7e21b0129e2a2006fe9690',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'c1b4c09c-8ed8-3b65-8b05-169c55358a83',
|
'id': 'e624c4bc-3389-34de-9dfc-025f74943409',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "Apple Is The World's Most Valuable Brand",
|
'title': '\'The Interview\' TV Spot: War',
|
||||||
'description': 'md5:73eabc1a11c6f59752593b2ceefa1262',
|
'description': 'The Interview',
|
||||||
'duration': 21,
|
'duration': 30,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
|
'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
|
||||||
@@ -116,6 +117,16 @@ class YahooIE(InfoExtractor):
|
|||||||
'description': 'md5:1428185051cfd1949807ad4ff6d3686a',
|
'description': 'md5:1428185051cfd1949807ad4ff6d3686a',
|
||||||
'duration': 201,
|
'duration': 201,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html',
|
||||||
|
'md5': '989396ae73d20c6f057746fb226aa215',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '\'True Story\' Trailer',
|
||||||
|
'description': 'True Story',
|
||||||
|
'duration': 150,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html',
|
'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -125,6 +136,7 @@ class YahooIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
display_id = mobj.group('display_id')
|
display_id = mobj.group('display_id')
|
||||||
|
page_id = mobj.group('id')
|
||||||
url = mobj.group('url')
|
url = mobj.group('url')
|
||||||
host = mobj.group('host')
|
host = mobj.group('host')
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
@@ -149,6 +161,7 @@ class YahooIE(InfoExtractor):
|
|||||||
r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
|
r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
|
||||||
r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"',
|
r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"',
|
||||||
r'"first_videoid"\s*:\s*"([^"]+)"',
|
r'"first_videoid"\s*:\s*"([^"]+)"',
|
||||||
|
r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id),
|
||||||
]
|
]
|
||||||
video_id = self._search_regex(CONTENT_ID_REGEXES, webpage, 'content ID')
|
video_id = self._search_regex(CONTENT_ID_REGEXES, webpage, 'content ID')
|
||||||
else:
|
else:
|
||||||
@@ -163,17 +176,15 @@ class YahooIE(InfoExtractor):
|
|||||||
region = self._search_regex(
|
region = self._search_regex(
|
||||||
r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"',
|
r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"',
|
||||||
webpage, 'region', fatal=False, default='US')
|
webpage, 'region', fatal=False, default='US')
|
||||||
query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"'
|
|
||||||
' AND plrs="86Gj0vCaSzV_Iuf6hNylf2" AND region="%s"'
|
|
||||||
' AND protocol="http"' % (video_id, region))
|
|
||||||
data = compat_urllib_parse.urlencode({
|
data = compat_urllib_parse.urlencode({
|
||||||
'q': query,
|
'protocol': 'http',
|
||||||
'env': 'prod',
|
'region': region,
|
||||||
'format': 'json',
|
|
||||||
})
|
})
|
||||||
|
query_url = (
|
||||||
|
'https://video.media.yql.yahoo.com/v1/video/sapi/streams/'
|
||||||
|
'{id}?{data}'.format(id=video_id, data=data))
|
||||||
query_result = self._download_json(
|
query_result = self._download_json(
|
||||||
'http://video.query.yahoo.com/v1/public/yql?' + data,
|
query_url, display_id, 'Downloading video info')
|
||||||
display_id, 'Downloading video info')
|
|
||||||
|
|
||||||
info = query_result['query']['results']['mediaObj'][0]
|
info = query_result['query']['results']['mediaObj'][0]
|
||||||
meta = info.get('meta')
|
meta = info.get('meta')
|
||||||
@@ -211,7 +222,7 @@ class YahooIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': meta['title'],
|
'title': unescapeHTML(meta['title']),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': clean_html(meta['description']),
|
'description': clean_html(meta['description']),
|
||||||
'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage),
|
'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage),
|
||||||
|
62
youtube_dl/extractor/yesjapan.py
Normal file
62
youtube_dl/extractor/yesjapan.py
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
HEADRequest,
|
||||||
|
get_element_by_attribute,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class YesJapanIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?yesjapan\.com/video/(?P<slug>[A-Za-z0-9\-]*)_(?P<id>[A-Za-z0-9]+)\.html'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.yesjapan.com/video/japanese-in-5-20-wa-and-ga-particle-usages_726497834.html',
|
||||||
|
'md5': 'f0be416314e5be21a12b499b330c21cf',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '726497834',
|
||||||
|
'title': 'Japanese in 5! #20 - WA And GA Particle Usages',
|
||||||
|
'description': 'This should clear up some issues most students of Japanese encounter with WA and GA....',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'timestamp': 1416391590,
|
||||||
|
'upload_date': '20141119',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
video_url = self._og_search_video_url(webpage)
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
|
timestamp = None
|
||||||
|
submit_info = get_element_by_attribute('class', 'pm-submit-data', webpage)
|
||||||
|
if submit_info:
|
||||||
|
timestamp = parse_iso8601(self._search_regex(
|
||||||
|
r'datetime="([^"]+)"', submit_info, 'upload date', fatal=False, default=None))
|
||||||
|
|
||||||
|
# attempt to resolve the final URL in order to get a proper extension
|
||||||
|
redirect_req = HEADRequest(video_url)
|
||||||
|
req = self._request_webpage(
|
||||||
|
redirect_req, video_id, note='Resolving final URL', errnote='Could not resolve final URL', fatal=False)
|
||||||
|
if req:
|
||||||
|
video_url = req.geturl()
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'format_id': 'sd',
|
||||||
|
'url': video_url,
|
||||||
|
}]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'description': description,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
@@ -256,7 +256,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||||
'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||||
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||||
'138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
'138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
|
||||||
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||||
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||||
'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
|
'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
|
||||||
@@ -418,6 +418,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'upload_date': '20140605',
|
'upload_date': '20140605',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# Age-gate video with encrypted signature
|
||||||
|
{
|
||||||
|
'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6kLq3WMV1nU',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
|
||||||
|
'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
|
||||||
|
'uploader': 'LloydVEVO',
|
||||||
|
'uploader_id': 'LloydVEVO',
|
||||||
|
'upload_date': '20110629',
|
||||||
|
},
|
||||||
|
},
|
||||||
# video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
|
# video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
|
||||||
{
|
{
|
||||||
'url': '__2ABJjxzNo',
|
'url': '__2ABJjxzNo',
|
||||||
@@ -478,7 +491,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
def _extract_signature_function(self, video_id, player_url, example_sig):
|
def _extract_signature_function(self, video_id, player_url, example_sig):
|
||||||
id_m = re.match(
|
id_m = re.match(
|
||||||
r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
|
r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
|
||||||
player_url)
|
player_url)
|
||||||
if not id_m:
|
if not id_m:
|
||||||
raise ExtractorError('Cannot identify player %r' % player_url)
|
raise ExtractorError('Cannot identify player %r' % player_url)
|
||||||
@@ -527,8 +540,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
return 's[%s%s%s]' % (starts, ends, steps)
|
return 's[%s%s%s]' % (starts, ends, steps)
|
||||||
|
|
||||||
step = None
|
step = None
|
||||||
start = '(Never used)' # Quelch pyflakes warnings - start will be
|
# Quelch pyflakes warnings - start will be set when step is set
|
||||||
# set as soon as step is set
|
start = '(Never used)'
|
||||||
for i, prev in zip(idxs[1:], idxs[:-1]):
|
for i, prev in zip(idxs[1:], idxs[:-1]):
|
||||||
if step is not None:
|
if step is not None:
|
||||||
if i - prev == step:
|
if i - prev == step:
|
||||||
@@ -599,24 +612,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
def _get_available_subtitles(self, video_id, webpage):
|
def _get_available_subtitles(self, video_id, webpage):
|
||||||
try:
|
try:
|
||||||
sub_list = self._download_webpage(
|
subs_doc = self._download_xml(
|
||||||
'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
|
'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
|
||||||
video_id, note=False)
|
video_id, note=False)
|
||||||
except ExtractorError as err:
|
except ExtractorError as err:
|
||||||
self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
|
self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
|
||||||
return {}
|
return {}
|
||||||
lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
|
|
||||||
|
|
||||||
sub_lang_list = {}
|
sub_lang_list = {}
|
||||||
for l in lang_list:
|
for track in subs_doc.findall('track'):
|
||||||
lang = l[1]
|
lang = track.attrib['lang_code']
|
||||||
if lang in sub_lang_list:
|
if lang in sub_lang_list:
|
||||||
continue
|
continue
|
||||||
params = compat_urllib_parse.urlencode({
|
params = compat_urllib_parse.urlencode({
|
||||||
'lang': lang,
|
'lang': lang,
|
||||||
'v': video_id,
|
'v': video_id,
|
||||||
'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
|
'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
|
||||||
'name': unescapeHTML(l[0]).encode('utf-8'),
|
'name': track.attrib['name'].encode('utf-8'),
|
||||||
})
|
})
|
||||||
url = 'https://www.youtube.com/api/timedtext?' + params
|
url = 'https://www.youtube.com/api/timedtext?' + params
|
||||||
sub_lang_list[lang] = url
|
sub_lang_list[lang] = url
|
||||||
@@ -649,10 +661,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
list_url = caption_url + '&' + list_params
|
list_url = caption_url + '&' + list_params
|
||||||
caption_list = self._download_xml(list_url, video_id)
|
caption_list = self._download_xml(list_url, video_id)
|
||||||
original_lang_node = caption_list.find('track')
|
original_lang_node = caption_list.find('track')
|
||||||
if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr':
|
if original_lang_node is None:
|
||||||
self._downloader.report_warning('Video doesn\'t have automatic captions')
|
self._downloader.report_warning('Video doesn\'t have automatic captions')
|
||||||
return {}
|
return {}
|
||||||
original_lang = original_lang_node.attrib['lang_code']
|
original_lang = original_lang_node.attrib['lang_code']
|
||||||
|
caption_kind = original_lang_node.attrib.get('kind', '')
|
||||||
|
|
||||||
sub_lang_list = {}
|
sub_lang_list = {}
|
||||||
for lang_node in caption_list.findall('target'):
|
for lang_node in caption_list.findall('target'):
|
||||||
@@ -662,7 +675,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'tlang': sub_lang,
|
'tlang': sub_lang,
|
||||||
'fmt': sub_format,
|
'fmt': sub_format,
|
||||||
'ts': timestamp,
|
'ts': timestamp,
|
||||||
'kind': 'asr',
|
'kind': caption_kind,
|
||||||
})
|
})
|
||||||
sub_lang_list[sub_lang] = caption_url + '&' + params
|
sub_lang_list[sub_lang] = caption_url + '&' + params
|
||||||
return sub_lang_list
|
return sub_lang_list
|
||||||
@@ -723,6 +736,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'width': int_or_none(r.attrib.get('width')),
|
'width': int_or_none(r.attrib.get('width')),
|
||||||
|
'height': int_or_none(r.attrib.get('height')),
|
||||||
'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
|
'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
|
||||||
'asr': int_or_none(r.attrib.get('audioSamplingRate')),
|
'asr': int_or_none(r.attrib.get('audioSamplingRate')),
|
||||||
'filesize': filesize,
|
'filesize': filesize,
|
||||||
@@ -733,7 +747,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
fo for fo in formats
|
fo for fo in formats
|
||||||
if fo['format_id'] == format_id)
|
if fo['format_id'] == format_id)
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
f.update(self._formats.get(format_id, {}))
|
f.update(self._formats.get(format_id, {}).items())
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
else:
|
else:
|
||||||
existing_format.update(f)
|
existing_format.update(f)
|
||||||
@@ -766,11 +780,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
age_gate = True
|
age_gate = True
|
||||||
# We simulate the access to the video from www.youtube.com/v/{video_id}
|
# We simulate the access to the video from www.youtube.com/v/{video_id}
|
||||||
# this can be viewed without login into Youtube
|
# this can be viewed without login into Youtube
|
||||||
|
url = proto + '://www.youtube.com/embed/%s' % video_id
|
||||||
|
embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
|
||||||
data = compat_urllib_parse.urlencode({
|
data = compat_urllib_parse.urlencode({
|
||||||
'video_id': video_id,
|
'video_id': video_id,
|
||||||
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
|
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
|
||||||
'sts': self._search_regex(
|
'sts': self._search_regex(
|
||||||
r'"sts"\s*:\s*(\d+)', video_webpage, 'sts', default=''),
|
r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
|
||||||
})
|
})
|
||||||
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
|
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
|
||||||
video_info_webpage = self._download_webpage(
|
video_info_webpage = self._download_webpage(
|
||||||
@@ -968,11 +984,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
elif 's' in url_data:
|
elif 's' in url_data:
|
||||||
encrypted_sig = url_data['s'][0]
|
encrypted_sig = url_data['s'][0]
|
||||||
|
|
||||||
if not age_gate:
|
jsplayer_url_json = self._search_regex(
|
||||||
jsplayer_url_json = self._search_regex(
|
r'"assets":.+?"js":\s*("[^"]+")',
|
||||||
r'"assets":.+?"js":\s*("[^"]+")',
|
embed_webpage if age_gate else video_webpage, 'JS player URL')
|
||||||
video_webpage, 'JS player URL')
|
player_url = json.loads(jsplayer_url_json)
|
||||||
player_url = json.loads(jsplayer_url_json)
|
|
||||||
if player_url is None:
|
if player_url is None:
|
||||||
player_url_json = self._search_regex(
|
player_url_json = self._search_regex(
|
||||||
r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
|
r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
|
||||||
@@ -1026,6 +1041,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
self.report_warning(
|
self.report_warning(
|
||||||
'Skipping DASH manifest: %r' % e, video_id)
|
'Skipping DASH manifest: %r' % e, video_id)
|
||||||
else:
|
else:
|
||||||
|
# Hide the formats we found through non-DASH
|
||||||
|
dash_keys = set(df['format_id'] for df in dash_formats)
|
||||||
|
for f in formats:
|
||||||
|
if f['format_id'] in dash_keys:
|
||||||
|
f['format_id'] = 'nondash-%s' % f['format_id']
|
||||||
|
f['preference'] -= 10000
|
||||||
formats.extend(dash_formats)
|
formats.extend(dash_formats)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
@@ -1128,6 +1149,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'JODA7',
|
'title': 'JODA7',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
|
||||||
|
'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Uploads from Interstellar Movie',
|
||||||
|
},
|
||||||
|
'playlist_mincout': 21,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
@@ -1212,6 +1240,10 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
'Downloading page #%s' % page_num,
|
'Downloading page #%s' % page_num,
|
||||||
transform_source=uppercase_escape)
|
transform_source=uppercase_escape)
|
||||||
content_html = more['content_html']
|
content_html = more['content_html']
|
||||||
|
if not content_html.strip():
|
||||||
|
# Some webpages show a "Load more" button but they don't
|
||||||
|
# have more videos
|
||||||
|
break
|
||||||
more_widget_html = more['load_more_widget_html']
|
more_widget_html = more['load_more_widget_html']
|
||||||
|
|
||||||
playlist_title = self._html_search_regex(
|
playlist_title = self._html_search_regex(
|
||||||
@@ -1555,9 +1587,11 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
feed_entries = []
|
feed_entries = []
|
||||||
paging = 0
|
paging = 0
|
||||||
for i in itertools.count(1):
|
for i in itertools.count(1):
|
||||||
info = self._download_json(self._FEED_TEMPLATE % paging,
|
info = self._download_json(
|
||||||
'%s feed' % self._FEED_NAME,
|
self._FEED_TEMPLATE % paging,
|
||||||
'Downloading page %s' % i)
|
'%s feed' % self._FEED_NAME,
|
||||||
|
'Downloading page %s' % i,
|
||||||
|
transform_source=uppercase_escape)
|
||||||
feed_html = info.get('feed_html') or info.get('content_html')
|
feed_html = info.get('feed_html') or info.get('content_html')
|
||||||
load_more_widget_html = info.get('load_more_widget_html') or feed_html
|
load_more_widget_html = info.get('load_more_widget_html') or feed_html
|
||||||
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
|
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
|
||||||
@@ -1674,3 +1708,20 @@ class YoutubeTruncatedURLIE(InfoExtractor):
|
|||||||
'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
|
'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
|
||||||
' or simply youtube-dl BaW_jenozKc .',
|
' or simply youtube-dl BaW_jenozKc .',
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
|
|
||||||
|
class YoutubeTruncatedIDIE(InfoExtractor):
|
||||||
|
IE_NAME = 'youtube:truncated_id'
|
||||||
|
IE_DESC = False # Do not list
|
||||||
|
_VALID_URL = r'https?://(?:www\.)youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
raise ExtractorError(
|
||||||
|
'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
|
||||||
|
expected=True)
|
||||||
|
@@ -109,7 +109,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
kw = {
|
kw = {
|
||||||
'version': __version__,
|
'version': __version__,
|
||||||
'formatter': fmt,
|
'formatter': fmt,
|
||||||
'usage': '%prog [options] url [url...]',
|
'usage': '%prog [OPTIONS] URL [URL...]',
|
||||||
'conflict_handler': 'resolve',
|
'conflict_handler': 'resolve',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -8,11 +8,16 @@ from .ffmpeg import (
|
|||||||
FFmpegExtractAudioPP,
|
FFmpegExtractAudioPP,
|
||||||
FFmpegMergerPP,
|
FFmpegMergerPP,
|
||||||
FFmpegMetadataPP,
|
FFmpegMetadataPP,
|
||||||
FFmpegVideoConvertor,
|
FFmpegVideoConvertorPP,
|
||||||
)
|
)
|
||||||
from .xattrpp import XAttrMetadataPP
|
from .xattrpp import XAttrMetadataPP
|
||||||
from .execafterdownload import ExecAfterDownloadPP
|
from .execafterdownload import ExecAfterDownloadPP
|
||||||
|
|
||||||
|
|
||||||
|
def get_postprocessor(key):
|
||||||
|
return globals()[key + 'PP']
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'AtomicParsleyPP',
|
'AtomicParsleyPP',
|
||||||
'ExecAfterDownloadPP',
|
'ExecAfterDownloadPP',
|
||||||
@@ -22,6 +27,6 @@ __all__ = [
|
|||||||
'FFmpegMergerPP',
|
'FFmpegMergerPP',
|
||||||
'FFmpegMetadataPP',
|
'FFmpegMetadataPP',
|
||||||
'FFmpegPostProcessor',
|
'FFmpegPostProcessor',
|
||||||
'FFmpegVideoConvertor',
|
'FFmpegVideoConvertorPP',
|
||||||
'XAttrMetadataPP',
|
'XAttrMetadataPP',
|
||||||
]
|
]
|
||||||
|
@@ -236,9 +236,9 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
|
|||||||
return self._nopostoverwrites, information
|
return self._nopostoverwrites, information
|
||||||
|
|
||||||
|
|
||||||
class FFmpegVideoConvertor(FFmpegPostProcessor):
|
class FFmpegVideoConvertorPP(FFmpegPostProcessor):
|
||||||
def __init__(self, downloader=None, preferedformat=None):
|
def __init__(self, downloader=None, preferedformat=None):
|
||||||
super(FFmpegVideoConvertor, self).__init__(downloader)
|
super(FFmpegVideoConvertorPP, self).__init__(downloader)
|
||||||
self._preferedformat = preferedformat
|
self._preferedformat = preferedformat
|
||||||
|
|
||||||
def run(self, information):
|
def run(self, information):
|
||||||
|
@@ -363,7 +363,7 @@ def encodeArgument(s):
|
|||||||
if not isinstance(s, compat_str):
|
if not isinstance(s, compat_str):
|
||||||
# Legacy code that uses byte strings
|
# Legacy code that uses byte strings
|
||||||
# Uncomment the following line after fixing all post processors
|
# Uncomment the following line after fixing all post processors
|
||||||
#assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
|
# assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
|
||||||
s = s.decode('ascii')
|
s = s.decode('ascii')
|
||||||
return encodeFilename(s, True)
|
return encodeFilename(s, True)
|
||||||
|
|
||||||
@@ -464,6 +464,13 @@ class ExtractorError(Exception):
|
|||||||
return ''.join(traceback.format_tb(self.traceback))
|
return ''.join(traceback.format_tb(self.traceback))
|
||||||
|
|
||||||
|
|
||||||
|
class UnsupportedError(ExtractorError):
|
||||||
|
def __init__(self, url):
|
||||||
|
super(UnsupportedError, self).__init__(
|
||||||
|
'Unsupported URL: %s' % url, expected=True)
|
||||||
|
self.url = url
|
||||||
|
|
||||||
|
|
||||||
class RegexNotFoundError(ExtractorError):
|
class RegexNotFoundError(ExtractorError):
|
||||||
"""Error when a regex didn't match"""
|
"""Error when a regex didn't match"""
|
||||||
pass
|
pass
|
||||||
@@ -1262,18 +1269,25 @@ def check_executable(exe, args=[]):
|
|||||||
|
|
||||||
|
|
||||||
def get_exe_version(exe, args=['--version'],
|
def get_exe_version(exe, args=['--version'],
|
||||||
version_re=r'version\s+([0-9._-a-zA-Z]+)',
|
version_re=None, unrecognized='present'):
|
||||||
unrecognized='present'):
|
|
||||||
""" Returns the version of the specified executable,
|
""" Returns the version of the specified executable,
|
||||||
or False if the executable is not present """
|
or False if the executable is not present """
|
||||||
try:
|
try:
|
||||||
out, err = subprocess.Popen(
|
out, _ = subprocess.Popen(
|
||||||
[exe] + args,
|
[exe] + args,
|
||||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
|
stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
|
||||||
except OSError:
|
except OSError:
|
||||||
return False
|
return False
|
||||||
firstline = out.partition(b'\n')[0].decode('ascii', 'ignore')
|
if isinstance(out, bytes): # Python 2.x
|
||||||
m = re.search(version_re, firstline)
|
out = out.decode('ascii', 'ignore')
|
||||||
|
return detect_exe_version(out, version_re, unrecognized)
|
||||||
|
|
||||||
|
|
||||||
|
def detect_exe_version(output, version_re=None, unrecognized='present'):
|
||||||
|
assert isinstance(output, compat_str)
|
||||||
|
if version_re is None:
|
||||||
|
version_re = r'version\s+([-0-9._a-zA-Z]+)'
|
||||||
|
m = re.search(version_re, output)
|
||||||
if m:
|
if m:
|
||||||
return m.group(1)
|
return m.group(1)
|
||||||
else:
|
else:
|
||||||
|
@@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2014.12.13.1'
|
__version__ = '2015.01.03'
|
||||||
|
Reference in New Issue
Block a user