Compare commits
156 Commits
2013.11.24
...
2013.12.09
Author | SHA1 | Date | |
---|---|---|---|
|
eaa1a7bde3 | ||
|
0783b09b92 | ||
|
ffe62508e4 | ||
|
ac79fa02b8 | ||
|
7cc3570e53 | ||
|
baa7b1978b | ||
|
ac5118bcb9 | ||
|
5adb818947 | ||
|
52defb0c9b | ||
|
56a8ab7d60 | ||
|
22686b91f0 | ||
|
31812a9e0e | ||
|
11bf848191 | ||
|
d4df5ed14c | ||
|
303b479e0a | ||
|
4c52160646 | ||
|
a213880aaf | ||
|
42d3bf844a | ||
|
b860967ce4 | ||
|
8ca6b8fba1 | ||
|
c4d9e6731a | ||
|
0d9ec5d963 | ||
|
870fc4e578 | ||
|
f623530d6e | ||
|
ca9e02dc00 | ||
|
fb30ec22fd | ||
|
5cc14c2fd7 | ||
|
d349cd2240 | ||
|
0b6a9f639f | ||
|
715c8e7bdb | ||
|
7d4afc557f | ||
|
563e405411 | ||
|
f53c966a73 | ||
|
336c3a69bd | ||
|
4e76179476 | ||
|
ef4fd84857 | ||
|
72135030d1 | ||
|
3514813d5b | ||
|
9e60602084 | ||
|
19e3dfc9f8 | ||
|
a1ef7e85d6 | ||
|
ef2fac6f4a | ||
|
7fc3fa0545 | ||
|
673d1273ff | ||
|
b9a2c53833 | ||
|
e9bf7479d2 | ||
|
bfb9f7bc4c | ||
|
6a656a843a | ||
|
29030c0a4c | ||
|
c0ade33e16 | ||
|
671c0f151d | ||
|
27dcce1904 | ||
|
84db81815a | ||
|
fb7abb31af | ||
|
ce93879a9b | ||
|
938384c587 | ||
|
e9d8e302aa | ||
|
cb7fb54600 | ||
|
cf6758d204 | ||
|
731e3dde29 | ||
|
a0eaa341e1 | ||
|
fb27c2295e | ||
|
1b753cb334 | ||
|
36a826a50d | ||
|
8796857429 | ||
|
aaebed13a8 | ||
|
25939ffe56 | ||
|
5270d8cb13 | ||
|
0037e02921 | ||
|
6ad14cab59 | ||
|
a9be0cc736 | ||
|
55a10eab48 | ||
|
e344693b65 | ||
|
355e4fd07e | ||
|
5e09d6abbd | ||
|
b138de72f2 | ||
|
06dcbb71d8 | ||
|
c5171c454b | ||
|
323ec6ae56 | ||
|
befd88b786 | ||
|
a3fb4675fb | ||
|
5f077efcb1 | ||
|
9986238ba9 | ||
|
e1f900d6a4 | ||
|
acf37ca151 | ||
|
17769d5a6c | ||
|
677c18092d | ||
|
3862402ff3 | ||
|
b03d0d064c | ||
|
d8d6148628 | ||
|
2be54167d0 | ||
|
4e0084d92e | ||
|
fc9e1cc697 | ||
|
f8f60d2793 | ||
|
ea07dbb8b1 | ||
|
2a275ab007 | ||
|
a2e6db365c | ||
|
9d93e7da6c | ||
|
0e44d8381a | ||
|
35907e23ec | ||
|
76d1700b28 | ||
|
dcca796ce4 | ||
|
4b19e38954 | ||
|
5f09bbff4d | ||
|
c1f9c59d11 | ||
|
652cdaa269 | ||
|
e26f871228 | ||
|
6e47b51eef | ||
|
4a98cdbf3b | ||
|
c5ed4e8f7e | ||
|
c2e52508cc | ||
|
d8ec4959c8 | ||
|
d31209a144 | ||
|
529a2e2cc3 | ||
|
781a7d0546 | ||
|
fb04e40396 | ||
|
d9b011f201 | ||
|
b0b9eaa196 | ||
|
8b134b1062 | ||
|
0c75c3fa7a | ||
|
a3927cf7ee | ||
|
1a62c18f65 | ||
|
2a15e7063b | ||
|
d46cc192d7 | ||
|
bb2bebdbe1 | ||
|
5db07df634 | ||
|
ea36cbac5e | ||
|
d0d2b49ab7 | ||
|
31cb6d8fef | ||
|
daa0dd2973 | ||
|
de79c46c8f | ||
|
94ccb6fa2e | ||
|
07e4035879 | ||
|
d0efb9ec9a | ||
|
ac05067d3d | ||
|
113577e155 | ||
|
79d09f47c2 | ||
|
c059bdd432 | ||
|
02dbf93f0e | ||
|
1fb2bcbbf7 | ||
|
16e055849e | ||
|
66cfab4226 | ||
|
6d88bc37a3 | ||
|
b7553b2554 | ||
|
e03db0a077 | ||
|
a1ee09e815 | ||
|
267ed0c5d3 | ||
|
f459d17018 | ||
|
dc65dcbb6d | ||
|
d214fdb8fe | ||
|
138df537ff | ||
|
0c7c19d6bc | ||
|
dca0872056 | ||
|
2b35c9ef74 | ||
|
4894fe8c5b | ||
|
d5a9bb4ea9 |
18
README.md
18
README.md
@@ -30,13 +30,16 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--list-extractors List all supported extractors and the URLs they
|
||||
would handle
|
||||
--extractor-descriptions Output descriptions of all supported extractors
|
||||
--proxy URL Use the specified HTTP/HTTPS proxy
|
||||
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in an
|
||||
empty string (--proxy "") for direct connection
|
||||
--no-check-certificate Suppress HTTPS certificate validation.
|
||||
--cache-dir DIR Location in the filesystem where youtube-dl can
|
||||
store downloaded information permanently. By
|
||||
default $XDG_CACHE_HOME/youtube-dl or ~/.cache
|
||||
/youtube-dl .
|
||||
--no-cache-dir Disable filesystem caching
|
||||
--bidi-workaround Work around terminals that lack bidirectional
|
||||
text support. Requires fribidi executable in PATH
|
||||
|
||||
## Video Selection:
|
||||
--playlist-start NUMBER playlist video to start at (default is 1)
|
||||
@@ -55,8 +58,9 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--dateafter DATE download only videos uploaded after this date
|
||||
--no-playlist download only the currently playing video
|
||||
--age-limit YEARS download only videos suitable for the given age
|
||||
--download-archive FILE Download only videos not present in the archive
|
||||
file. Record all downloaded videos in it.
|
||||
--download-archive FILE Download only videos not listed in the archive
|
||||
file. Record the IDs of all downloaded videos in
|
||||
it.
|
||||
|
||||
## Download Options:
|
||||
-r, --rate-limit LIMIT maximum download rate in bytes per second (e.g.
|
||||
@@ -130,11 +134,11 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
-v, --verbose print various debugging information
|
||||
--dump-intermediate-pages print downloaded pages to debug problems(very
|
||||
verbose)
|
||||
--write-pages Write downloaded pages to files in the current
|
||||
directory
|
||||
--write-pages Write downloaded intermediary pages to files in
|
||||
the current directory to debug problems
|
||||
|
||||
## Video Format Options:
|
||||
-f, --format FORMAT video format code, specifiy the order of
|
||||
-f, --format FORMAT video format code, specify the order of
|
||||
preference using slashes: "-f 22/17/18". "-f mp4"
|
||||
and "-f flv" are also supported
|
||||
--all-formats download all available video formats
|
||||
@@ -182,7 +186,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`.
|
||||
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`.
|
||||
|
||||
# OUTPUT TEMPLATE
|
||||
|
||||
|
@@ -1,10 +1,21 @@
|
||||
__youtube_dl()
|
||||
{
|
||||
local cur prev opts
|
||||
local cur prev opts fileopts diropts keywords
|
||||
COMPREPLY=()
|
||||
cur="${COMP_WORDS[COMP_CWORD]}"
|
||||
prev="${COMP_WORDS[COMP_CWORD-1]}"
|
||||
opts="{{flags}}"
|
||||
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater"
|
||||
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
|
||||
fileopts="-a|--batch-file|--download-archive|--cookies"
|
||||
diropts="--cache-dir"
|
||||
|
||||
if [[ ${prev} =~ ${fileopts} ]]; then
|
||||
COMPREPLY=( $(compgen -f -- ${cur}) )
|
||||
return 0
|
||||
elif [[ ${prev} =~ ${diropts} ]]; then
|
||||
COMPREPLY=( $(compgen -d -- ${cur}) )
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [[ ${cur} =~ : ]]; then
|
||||
COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
|
||||
|
@@ -12,10 +12,6 @@ from youtube_dl import YoutubeDL
|
||||
from youtube_dl.utils import preferredencoding
|
||||
|
||||
|
||||
def global_setup():
|
||||
youtube_dl._setup_opener(timeout=10)
|
||||
|
||||
|
||||
def get_params(override=None):
|
||||
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
||||
"parameters.json")
|
||||
|
@@ -39,5 +39,6 @@
|
||||
"writeinfojson": true,
|
||||
"writesubtitles": false,
|
||||
"allsubtitles": false,
|
||||
"listssubtitles": false
|
||||
"listssubtitles": false,
|
||||
"socket_timeout": 20
|
||||
}
|
||||
|
@@ -6,8 +6,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import global_setup, try_rm
|
||||
global_setup()
|
||||
from test.helper import try_rm
|
||||
|
||||
|
||||
from youtube_dl import YoutubeDL
|
||||
|
@@ -100,10 +100,15 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
def test_keywords(self):
|
||||
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
|
||||
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
|
||||
self.assertMatch(':thedailyshow', ['ComedyCentral'])
|
||||
self.assertMatch(':tds', ['ComedyCentral'])
|
||||
self.assertMatch(':colbertreport', ['ComedyCentral'])
|
||||
self.assertMatch(':cr', ['ComedyCentral'])
|
||||
self.assertMatch(':ythistory', ['youtube:history'])
|
||||
self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
|
||||
self.assertMatch(':tds', ['ComedyCentralShows'])
|
||||
self.assertMatch(':colbertreport', ['ComedyCentralShows'])
|
||||
self.assertMatch(':cr', ['ComedyCentralShows'])
|
||||
|
||||
def test_vimeo_matching(self):
|
||||
self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
|
||||
self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@@ -9,12 +9,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from test.helper import (
|
||||
get_params,
|
||||
get_testcases,
|
||||
global_setup,
|
||||
try_rm,
|
||||
md5,
|
||||
report_warning
|
||||
)
|
||||
global_setup()
|
||||
|
||||
|
||||
import hashlib
|
||||
|
@@ -8,21 +8,25 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL, global_setup
|
||||
global_setup()
|
||||
from test.helper import FakeYDL
|
||||
|
||||
|
||||
from youtube_dl.extractor import (
|
||||
DailymotionPlaylistIE,
|
||||
DailymotionUserIE,
|
||||
VimeoChannelIE,
|
||||
VimeoUserIE,
|
||||
VimeoAlbumIE,
|
||||
VimeoGroupsIE,
|
||||
UstreamChannelIE,
|
||||
SoundcloudSetIE,
|
||||
SoundcloudUserIE,
|
||||
LivestreamIE,
|
||||
NHLVideocenterIE,
|
||||
BambuserChannelIE,
|
||||
BandcampAlbumIE
|
||||
BandcampAlbumIE,
|
||||
SmotriCommunityIE,
|
||||
SmotriUserIE
|
||||
)
|
||||
|
||||
|
||||
@@ -55,6 +59,30 @@ class TestPlaylists(unittest.TestCase):
|
||||
self.assertEqual(result['title'], u'Vimeo Tributes')
|
||||
self.assertTrue(len(result['entries']) > 24)
|
||||
|
||||
def test_vimeo_user(self):
|
||||
dl = FakeYDL()
|
||||
ie = VimeoUserIE(dl)
|
||||
result = ie.extract('http://vimeo.com/nkistudio/videos')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], u'Nki')
|
||||
self.assertTrue(len(result['entries']) > 65)
|
||||
|
||||
def test_vimeo_album(self):
|
||||
dl = FakeYDL()
|
||||
ie = VimeoAlbumIE(dl)
|
||||
result = ie.extract('http://vimeo.com/album/2632481')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], u'Staff Favorites: November 2013')
|
||||
self.assertTrue(len(result['entries']) > 12)
|
||||
|
||||
def test_vimeo_groups(self):
|
||||
dl = FakeYDL()
|
||||
ie = VimeoGroupsIE(dl)
|
||||
result = ie.extract('http://vimeo.com/groups/rolexawards')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], u'Rolex Awards for Enterprise')
|
||||
self.assertTrue(len(result['entries']) > 72)
|
||||
|
||||
def test_ustream_channel(self):
|
||||
dl = FakeYDL()
|
||||
ie = UstreamChannelIE(dl)
|
||||
@@ -111,6 +139,24 @@ class TestPlaylists(unittest.TestCase):
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], u'Nightmare Night EP')
|
||||
self.assertTrue(len(result['entries']) >= 4)
|
||||
|
||||
def test_smotri_community(self):
|
||||
dl = FakeYDL()
|
||||
ie = SmotriCommunityIE(dl)
|
||||
result = ie.extract('http://smotri.com/community/video/kommuna')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], u'kommuna')
|
||||
self.assertEqual(result['title'], u'КПРФ')
|
||||
self.assertTrue(len(result['entries']) >= 4)
|
||||
|
||||
def test_smotri_user(self):
|
||||
dl = FakeYDL()
|
||||
ie = SmotriUserIE(dl)
|
||||
result = ie.extract('http://smotri.com/user/inspector')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], u'inspector')
|
||||
self.assertEqual(result['title'], u'Inspector')
|
||||
self.assertTrue(len(result['entries']) >= 9)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -6,8 +6,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL, global_setup, md5
|
||||
global_setup()
|
||||
from test.helper import FakeYDL, md5
|
||||
|
||||
|
||||
from youtube_dl.extractor import (
|
||||
@@ -73,7 +72,7 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitlesformat'] = 'vtt'
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
|
||||
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
|
||||
|
||||
def test_youtube_list_subtitles(self):
|
||||
self.DL.expect_warning(u'Video doesn\'t have automatic captions')
|
||||
|
@@ -26,6 +26,7 @@ from youtube_dl.utils import (
|
||||
unsmuggle_url,
|
||||
shell_quote,
|
||||
encodeFilename,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
@@ -176,6 +177,10 @@ class TestUtil(unittest.TestCase):
|
||||
args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')]
|
||||
self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""")
|
||||
|
||||
def test_str_to_int(self):
|
||||
self.assertEqual(str_to_int('123,456'), 123456)
|
||||
self.assertEqual(str_to_int('123.456'), 123456)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -7,8 +7,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import get_params, global_setup, try_rm
|
||||
global_setup()
|
||||
from test.helper import get_params, try_rm
|
||||
|
||||
|
||||
import io
|
||||
|
@@ -7,8 +7,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import get_params, global_setup
|
||||
global_setup()
|
||||
from test.helper import get_params
|
||||
|
||||
|
||||
import io
|
||||
@@ -34,6 +33,7 @@ TEST_ID = 'BaW_jenozKc'
|
||||
INFO_JSON_FILE = TEST_ID + '.info.json'
|
||||
DESCRIPTION_FILE = TEST_ID + '.mp4.description'
|
||||
EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐
|
||||
test URL: https://github.com/rg3/youtube-dl/issues/1892
|
||||
|
||||
This is a test video for youtube-dl.
|
||||
|
||||
|
@@ -6,8 +6,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL, global_setup
|
||||
global_setup()
|
||||
from test.helper import FakeYDL
|
||||
|
||||
|
||||
from youtube_dl.extractor import (
|
||||
@@ -108,5 +107,14 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
result = ie.extract('http://www.youtube.com/show/airdisasters')
|
||||
self.assertTrue(len(result) >= 3)
|
||||
|
||||
def test_youtube_mix(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('http://www.youtube.com/watch?v=lLJf9qJHR3E&list=RDrjFaenf1T-Y')
|
||||
entries = result['entries']
|
||||
self.assertTrue(len(entries) >= 20)
|
||||
original_video = entries[0]
|
||||
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -6,9 +6,6 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import global_setup
|
||||
global_setup()
|
||||
|
||||
|
||||
import io
|
||||
import re
|
||||
|
@@ -1,4 +1,3 @@
|
||||
import math
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
@@ -11,6 +10,7 @@ from .utils import (
|
||||
ContentTooShortError,
|
||||
determine_ext,
|
||||
encodeFilename,
|
||||
format_bytes,
|
||||
sanitize_open,
|
||||
timeconvert,
|
||||
)
|
||||
@@ -53,20 +53,6 @@ class FileDownloader(object):
|
||||
self._progress_hooks = []
|
||||
self.params = params
|
||||
|
||||
@staticmethod
|
||||
def format_bytes(bytes):
|
||||
if bytes is None:
|
||||
return 'N/A'
|
||||
if type(bytes) is str:
|
||||
bytes = float(bytes)
|
||||
if bytes == 0.0:
|
||||
exponent = 0
|
||||
else:
|
||||
exponent = int(math.log(bytes, 1024.0))
|
||||
suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
|
||||
converted = float(bytes) / float(1024 ** exponent)
|
||||
return '%.2f%s' % (converted, suffix)
|
||||
|
||||
@staticmethod
|
||||
def format_seconds(seconds):
|
||||
(mins, secs) = divmod(seconds, 60)
|
||||
@@ -117,7 +103,7 @@ class FileDownloader(object):
|
||||
def format_speed(speed):
|
||||
if speed is None:
|
||||
return '%10s' % '---b/s'
|
||||
return '%10s' % ('%s/s' % FileDownloader.format_bytes(speed))
|
||||
return '%10s' % ('%s/s' % format_bytes(speed))
|
||||
|
||||
@staticmethod
|
||||
def best_block_size(elapsed_time, bytes):
|
||||
@@ -218,11 +204,27 @@ class FileDownloader(object):
|
||||
"""Report destination filename."""
|
||||
self.to_screen(u'[download] Destination: ' + filename)
|
||||
|
||||
def _report_progress_status(self, msg, is_last_line=False):
|
||||
fullmsg = u'[download] ' + msg
|
||||
if self.params.get('progress_with_newline', False):
|
||||
self.to_screen(fullmsg)
|
||||
else:
|
||||
if os.name == 'nt':
|
||||
prev_len = getattr(self, '_report_progress_prev_line_length',
|
||||
0)
|
||||
if prev_len > len(fullmsg):
|
||||
fullmsg += u' ' * (prev_len - len(fullmsg))
|
||||
self._report_progress_prev_line_length = len(fullmsg)
|
||||
clear_line = u'\r'
|
||||
else:
|
||||
clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r')
|
||||
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
|
||||
self.to_console_title(u'youtube-dl ' + msg)
|
||||
|
||||
def report_progress(self, percent, data_len_str, speed, eta):
|
||||
"""Report download progress."""
|
||||
if self.params.get('noprogress', False):
|
||||
return
|
||||
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
|
||||
if eta is not None:
|
||||
eta_str = self.format_eta(eta)
|
||||
else:
|
||||
@@ -232,14 +234,20 @@ class FileDownloader(object):
|
||||
else:
|
||||
percent_str = 'Unknown %'
|
||||
speed_str = self.format_speed(speed)
|
||||
if self.params.get('progress_with_newline', False):
|
||||
self.to_screen(u'[download] %s of %s at %s ETA %s' %
|
||||
(percent_str, data_len_str, speed_str, eta_str))
|
||||
|
||||
msg = (u'%s of %s at %s ETA %s' %
|
||||
(percent_str, data_len_str, speed_str, eta_str))
|
||||
self._report_progress_status(msg)
|
||||
|
||||
def report_finish(self, data_len_str, tot_time):
|
||||
"""Report download finished."""
|
||||
if self.params.get('noprogress', False):
|
||||
self.to_screen(u'[download] Download completed')
|
||||
else:
|
||||
self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
|
||||
(clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
|
||||
self.to_console_title(u'youtube-dl - %s of %s at %s ETA %s' %
|
||||
(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
|
||||
self._report_progress_status(
|
||||
(u'100%% of %s in %s' %
|
||||
(data_len_str, self.format_seconds(tot_time))),
|
||||
is_last_line=True)
|
||||
|
||||
def report_resuming_byte(self, resume_len):
|
||||
"""Report attempt to resume at given byte."""
|
||||
@@ -260,16 +268,62 @@ class FileDownloader(object):
|
||||
"""Report it was impossible to resume download."""
|
||||
self.to_screen(u'[download] Unable to resume')
|
||||
|
||||
def report_finish(self, data_len_str, tot_time):
|
||||
"""Report download finished."""
|
||||
if self.params.get('noprogress', False):
|
||||
self.to_screen(u'[download] Download completed')
|
||||
else:
|
||||
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
|
||||
self.to_screen(u'\r%s[download] 100%% of %s in %s' %
|
||||
(clear_line, data_len_str, self.format_seconds(tot_time)))
|
||||
|
||||
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live):
|
||||
def run_rtmpdump(args):
|
||||
start = time.time()
|
||||
resume_percent = None
|
||||
resume_downloaded_data_len = None
|
||||
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
|
||||
cursor_in_new_line = True
|
||||
proc_stderr_closed = False
|
||||
while not proc_stderr_closed:
|
||||
# read line from stderr
|
||||
line = u''
|
||||
while True:
|
||||
char = proc.stderr.read(1)
|
||||
if not char:
|
||||
proc_stderr_closed = True
|
||||
break
|
||||
if char in [b'\r', b'\n']:
|
||||
break
|
||||
line += char.decode('ascii', 'replace')
|
||||
if not line:
|
||||
# proc_stderr_closed is True
|
||||
continue
|
||||
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
|
||||
if mobj:
|
||||
downloaded_data_len = int(float(mobj.group(1))*1024)
|
||||
percent = float(mobj.group(2))
|
||||
if not resume_percent:
|
||||
resume_percent = percent
|
||||
resume_downloaded_data_len = downloaded_data_len
|
||||
eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
|
||||
speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
|
||||
data_len = None
|
||||
if percent > 0:
|
||||
data_len = int(downloaded_data_len * 100 / percent)
|
||||
data_len_str = u'~' + format_bytes(data_len)
|
||||
self.report_progress(percent, data_len_str, speed, eta)
|
||||
cursor_in_new_line = False
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': downloaded_data_len,
|
||||
'total_bytes': data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'status': 'downloading',
|
||||
'eta': eta,
|
||||
'speed': speed,
|
||||
})
|
||||
elif self.params.get('verbose', False):
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen(u'')
|
||||
cursor_in_new_line = True
|
||||
self.to_screen(u'[rtmpdump] '+line)
|
||||
proc.wait()
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen(u'')
|
||||
return proc.returncode
|
||||
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
test = self.params.get('test', False)
|
||||
@@ -280,12 +334,11 @@ class FileDownloader(object):
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
|
||||
return False
|
||||
verbosity_option = '--verbose' if self.params.get('verbose', False) else '--quiet'
|
||||
|
||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||
# the connection was interrumpted and resuming appears to be
|
||||
# possible. This is part of rtmpdump's normal usage, AFAIK.
|
||||
basic_args = ['rtmpdump', verbosity_option, '-r', url, '-o', tmpfilename]
|
||||
basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
|
||||
if player_url is not None:
|
||||
basic_args += ['--swfVfy', player_url]
|
||||
if page_url is not None:
|
||||
@@ -299,30 +352,48 @@ class FileDownloader(object):
|
||||
if live:
|
||||
basic_args += ['--live']
|
||||
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
|
||||
|
||||
if sys.platform == 'win32' and sys.version_info < (3, 0):
|
||||
# Windows subprocess module does not actually support Unicode
|
||||
# on Python 2.x
|
||||
# See http://stackoverflow.com/a/9951851/35070
|
||||
subprocess_encoding = sys.getfilesystemencoding()
|
||||
args = [a.encode(subprocess_encoding, 'ignore') for a in args]
|
||||
else:
|
||||
subprocess_encoding = None
|
||||
|
||||
if self.params.get('verbose', False):
|
||||
if subprocess_encoding:
|
||||
str_args = [
|
||||
a.decode(subprocess_encoding) if isinstance(a, bytes) else a
|
||||
for a in args]
|
||||
else:
|
||||
str_args = args
|
||||
try:
|
||||
import pipes
|
||||
shell_quote = lambda args: ' '.join(map(pipes.quote, args))
|
||||
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
|
||||
except ImportError:
|
||||
shell_quote = repr
|
||||
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
|
||||
retval = subprocess.call(args)
|
||||
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
|
||||
|
||||
retval = run_rtmpdump(args)
|
||||
|
||||
while (retval == 2 or retval == 1) and not test:
|
||||
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
|
||||
self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
|
||||
time.sleep(5.0) # This seems to be needed
|
||||
retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
|
||||
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
|
||||
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
if prevsize == cursize and retval == 1:
|
||||
break
|
||||
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
||||
if prevsize == cursize and retval == 2 and cursize > 1024:
|
||||
self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
||||
self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
||||
retval = 0
|
||||
break
|
||||
if retval == 0 or (test and retval == 2):
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
|
||||
self.to_screen(u'[rtmpdump] %s bytes' % fsize)
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
@@ -525,7 +596,7 @@ class FileDownloader(object):
|
||||
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||
return False
|
||||
|
||||
data_len_str = self.format_bytes(data_len)
|
||||
data_len_str = format_bytes(data_len)
|
||||
byte_counter = 0 + resume_len
|
||||
block_size = self.params.get('buffersize', 1024)
|
||||
start = time.time()
|
||||
|
@@ -7,8 +7,10 @@ import errno
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import socket
|
||||
import sys
|
||||
import time
|
||||
@@ -18,6 +20,7 @@ if os.name == 'nt':
|
||||
import ctypes
|
||||
|
||||
from .utils import (
|
||||
compat_cookiejar,
|
||||
compat_http_client,
|
||||
compat_print,
|
||||
compat_str,
|
||||
@@ -30,9 +33,12 @@ from .utils import (
|
||||
DownloadError,
|
||||
encodeFilename,
|
||||
ExtractorError,
|
||||
format_bytes,
|
||||
locked_file,
|
||||
make_HTTPS_handler,
|
||||
MaxDownloadsReached,
|
||||
PostProcessingError,
|
||||
platform_name,
|
||||
preferredencoding,
|
||||
SameFileError,
|
||||
sanitize_filename,
|
||||
@@ -41,9 +47,11 @@ from .utils import (
|
||||
UnavailableVideoError,
|
||||
write_json_file,
|
||||
write_string,
|
||||
YoutubeDLHandler,
|
||||
)
|
||||
from .extractor import get_info_extractor, gen_extractors
|
||||
from .FileDownloader import FileDownloader
|
||||
from .version import __version__
|
||||
|
||||
|
||||
class YoutubeDL(object):
|
||||
@@ -118,9 +126,15 @@ class YoutubeDL(object):
|
||||
noplaylist: Download single video instead of a playlist if in doubt.
|
||||
age_limit: An integer representing the user's age in years.
|
||||
Unsuitable videos for the given age are skipped.
|
||||
downloadarchive: File name of a file where all downloads are recorded.
|
||||
download_archive: File name of a file where all downloads are recorded.
|
||||
Videos already present in the file are not downloaded
|
||||
again.
|
||||
cookiefile: File name where cookies should be read from and dumped to.
|
||||
nocheckcertificate:Do not verify SSL certificates
|
||||
proxy: URL of the proxy server to use
|
||||
socket_timeout: Time to wait for unresponsive hosts, in seconds
|
||||
bidi_workaround: Work around buggy terminals without bidirectional text
|
||||
support, using fridibi
|
||||
|
||||
The following parameters are not used by YoutubeDL itself, they are used by
|
||||
the FileDownloader:
|
||||
@@ -135,7 +149,7 @@ class YoutubeDL(object):
|
||||
_num_downloads = None
|
||||
_screen_file = None
|
||||
|
||||
def __init__(self, params):
|
||||
def __init__(self, params=None):
|
||||
"""Create a FileDownloader object with the given options."""
|
||||
self._ies = []
|
||||
self._ies_instances = {}
|
||||
@@ -144,6 +158,44 @@ class YoutubeDL(object):
|
||||
self._download_retcode = 0
|
||||
self._num_downloads = 0
|
||||
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
||||
self._err_file = sys.stderr
|
||||
self.params = {} if params is None else params
|
||||
|
||||
# Pipe messsages through fribidi
|
||||
if params.get('bidi_workaround', False):
|
||||
# fribidi does not support ungetting, so force newlines
|
||||
params['progress_with_newline'] = True
|
||||
|
||||
for fid in ['_screen_file', '_err_file']:
|
||||
class FribidiOut(object):
|
||||
def __init__(self, outfile, errfile):
|
||||
self.outfile = outfile
|
||||
self.process = subprocess.Popen(
|
||||
['fribidi'],
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=outfile,
|
||||
stderr=errfile)
|
||||
|
||||
def write(self, s):
|
||||
res = self.process.stdin.write(s)
|
||||
self.flush()
|
||||
return res
|
||||
|
||||
def flush(self):
|
||||
return self.process.stdin.flush()
|
||||
|
||||
def isatty(self):
|
||||
return self.outfile.isatty()
|
||||
|
||||
try:
|
||||
vout = FribidiOut(getattr(self, fid), self._err_file)
|
||||
setattr(self, fid, vout)
|
||||
except OSError as ose:
|
||||
if ose.errno == 2:
|
||||
self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
|
||||
break
|
||||
else:
|
||||
raise
|
||||
|
||||
if (sys.version_info >= (3,) and sys.platform != 'win32' and
|
||||
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
|
||||
@@ -153,14 +205,15 @@ class YoutubeDL(object):
|
||||
u'Assuming --restrict-filenames since file system encoding '
|
||||
u'cannot encode all charactes. '
|
||||
u'Set the LC_ALL environment variable to fix this.')
|
||||
params['restrictfilenames'] = True
|
||||
self.params['restrictfilenames'] = True
|
||||
|
||||
self.params = params
|
||||
self.fd = FileDownloader(self, self.params)
|
||||
|
||||
if '%(stitle)s' in self.params['outtmpl']:
|
||||
if '%(stitle)s' in self.params.get('outtmpl', ''):
|
||||
self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
|
||||
|
||||
self._setup_opener()
|
||||
|
||||
def add_info_extractor(self, ie):
|
||||
"""Add an InfoExtractor object to the end of the list."""
|
||||
self._ies.append(ie)
|
||||
@@ -192,10 +245,14 @@ class YoutubeDL(object):
|
||||
pp.set_downloader(self)
|
||||
|
||||
def to_screen(self, message, skip_eol=False):
|
||||
"""Print message to stdout if not in quiet mode."""
|
||||
return self.to_stdout(message, skip_eol, check_quiet=True)
|
||||
|
||||
def to_stdout(self, message, skip_eol=False, check_quiet=False):
|
||||
"""Print message to stdout if not in quiet mode."""
|
||||
if self.params.get('logger'):
|
||||
self.params['logger'].debug(message)
|
||||
elif not self.params.get('quiet', False):
|
||||
elif not check_quiet or not self.params.get('quiet', False):
|
||||
terminator = [u'\n', u''][skip_eol]
|
||||
output = message + terminator
|
||||
write_string(output, self._screen_file)
|
||||
@@ -207,9 +264,7 @@ class YoutubeDL(object):
|
||||
self.params['logger'].error(message)
|
||||
else:
|
||||
output = message + u'\n'
|
||||
if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
|
||||
output = output.encode(preferredencoding())
|
||||
sys.stderr.write(output)
|
||||
write_string(output, self._err_file)
|
||||
|
||||
def to_console_title(self, message):
|
||||
if not self.params.get('consoletitle', False):
|
||||
@@ -241,10 +296,9 @@ class YoutubeDL(object):
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.restore_console_title()
|
||||
|
||||
def fixed_template(self):
|
||||
"""Checks if the output template is fixed."""
|
||||
return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
|
||||
|
||||
if self.params.get('cookiefile') is not None:
|
||||
self.cookiejar.save()
|
||||
|
||||
def trouble(self, message=None, tb=None):
|
||||
"""Determine action to take when a download problem appears.
|
||||
@@ -281,7 +335,7 @@ class YoutubeDL(object):
|
||||
Print the message to stderr, it will be prefixed with 'WARNING:'
|
||||
If stderr is a tty file the 'WARNING:' will be colored
|
||||
'''
|
||||
if sys.stderr.isatty() and os.name != 'nt':
|
||||
if self._err_file.isatty() and os.name != 'nt':
|
||||
_msg_header = u'\033[0;33mWARNING:\033[0m'
|
||||
else:
|
||||
_msg_header = u'WARNING:'
|
||||
@@ -293,7 +347,7 @@ class YoutubeDL(object):
|
||||
Do the same as trouble, but prefixes the message with 'ERROR:', colored
|
||||
in red if stderr is a tty file.
|
||||
'''
|
||||
if sys.stderr.isatty() and os.name != 'nt':
|
||||
if self._err_file.isatty() and os.name != 'nt':
|
||||
_msg_header = u'\033[0;31mERROR:\033[0m'
|
||||
else:
|
||||
_msg_header = u'ERROR:'
|
||||
@@ -392,7 +446,8 @@ class YoutubeDL(object):
|
||||
for key, value in extra_info.items():
|
||||
info_dict.setdefault(key, value)
|
||||
|
||||
def extract_info(self, url, download=True, ie_key=None, extra_info={}):
|
||||
def extract_info(self, url, download=True, ie_key=None, extra_info={},
|
||||
process=True):
|
||||
'''
|
||||
Returns a list with a dictionary for each video we find.
|
||||
If 'download', also downloads the videos.
|
||||
@@ -428,7 +483,10 @@ class YoutubeDL(object):
|
||||
'webpage_url': url,
|
||||
'extractor_key': ie.ie_key(),
|
||||
})
|
||||
return self.process_ie_result(ie_result, download, extra_info)
|
||||
if process:
|
||||
return self.process_ie_result(ie_result, download, extra_info)
|
||||
else:
|
||||
return ie_result
|
||||
except ExtractorError as de: # An error we somewhat expected
|
||||
self.report_error(compat_str(de), de.format_traceback())
|
||||
break
|
||||
@@ -461,8 +519,33 @@ class YoutubeDL(object):
|
||||
download,
|
||||
ie_key=ie_result.get('ie_key'),
|
||||
extra_info=extra_info)
|
||||
elif result_type == 'playlist':
|
||||
elif result_type == 'url_transparent':
|
||||
# Use the information from the embedding page
|
||||
info = self.extract_info(
|
||||
ie_result['url'], ie_key=ie_result.get('ie_key'),
|
||||
extra_info=extra_info, download=False, process=False)
|
||||
|
||||
def make_result(embedded_info):
|
||||
new_result = ie_result.copy()
|
||||
for f in ('_type', 'url', 'ext', 'player_url', 'formats',
|
||||
'entries', 'urlhandle', 'ie_key', 'duration',
|
||||
'subtitles', 'annotations', 'format',
|
||||
'thumbnail', 'thumbnails'):
|
||||
if f in new_result:
|
||||
del new_result[f]
|
||||
if f in embedded_info:
|
||||
new_result[f] = embedded_info[f]
|
||||
return new_result
|
||||
new_result = make_result(info)
|
||||
|
||||
assert new_result.get('_type') != 'url_transparent'
|
||||
if new_result.get('_type') == 'compat_list':
|
||||
new_result['entries'] = [
|
||||
make_result(e) for e in new_result['entries']]
|
||||
|
||||
return self.process_ie_result(
|
||||
new_result, download=download, extra_info=extra_info)
|
||||
elif result_type == 'playlist':
|
||||
# We process each entry in the playlist
|
||||
playlist = ie_result.get('title', None) or ie_result.get('id', None)
|
||||
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
|
||||
@@ -653,22 +736,22 @@ class YoutubeDL(object):
|
||||
|
||||
# Forced printings
|
||||
if self.params.get('forcetitle', False):
|
||||
compat_print(info_dict['fulltitle'])
|
||||
self.to_stdout(info_dict['fulltitle'])
|
||||
if self.params.get('forceid', False):
|
||||
compat_print(info_dict['id'])
|
||||
self.to_stdout(info_dict['id'])
|
||||
if self.params.get('forceurl', False):
|
||||
# For RTMP URLs, also include the playpath
|
||||
compat_print(info_dict['url'] + info_dict.get('play_path', u''))
|
||||
self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
|
||||
if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
|
||||
compat_print(info_dict['thumbnail'])
|
||||
self.to_stdout(info_dict['thumbnail'])
|
||||
if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
|
||||
compat_print(info_dict['description'])
|
||||
self.to_stdout(info_dict['description'])
|
||||
if self.params.get('forcefilename', False) and filename is not None:
|
||||
compat_print(filename)
|
||||
self.to_stdout(filename)
|
||||
if self.params.get('forceformat', False):
|
||||
compat_print(info_dict['format'])
|
||||
self.to_stdout(info_dict['format'])
|
||||
if self.params.get('forcejson', False):
|
||||
compat_print(json.dumps(info_dict))
|
||||
self.to_stdout(json.dumps(info_dict))
|
||||
|
||||
# Do nothing else if in simulate mode
|
||||
if self.params.get('simulate', False):
|
||||
@@ -782,13 +865,15 @@ class YoutubeDL(object):
|
||||
|
||||
def download(self, url_list):
|
||||
"""Download a given list of URLs."""
|
||||
if len(url_list) > 1 and self.fixed_template():
|
||||
if (len(url_list) > 1 and
|
||||
'%' not in self.params['outtmpl']
|
||||
and self.params.get('max_downloads') != 1):
|
||||
raise SameFileError(self.params['outtmpl'])
|
||||
|
||||
for url in url_list:
|
||||
try:
|
||||
#It also downloads the videos
|
||||
videos = self.extract_info(url)
|
||||
self.extract_info(url)
|
||||
except UnavailableVideoError:
|
||||
self.report_error(u'unable to download video')
|
||||
except MaxDownloadsReached:
|
||||
@@ -820,20 +905,26 @@ class YoutubeDL(object):
|
||||
except (IOError, OSError):
|
||||
self.report_warning(u'Unable to remove downloaded video file')
|
||||
|
||||
def in_download_archive(self, info_dict):
|
||||
fn = self.params.get('download_archive')
|
||||
if fn is None:
|
||||
return False
|
||||
extractor = info_dict.get('extractor_id')
|
||||
def _make_archive_id(self, info_dict):
|
||||
# Future-proof against any change in case
|
||||
# and backwards compatibility with prior versions
|
||||
extractor = info_dict.get('extractor_key')
|
||||
if extractor is None:
|
||||
if 'id' in info_dict:
|
||||
extractor = info_dict.get('ie_key') # key in a playlist
|
||||
if extractor is None:
|
||||
return None # Incomplete video information
|
||||
return extractor.lower() + u' ' + info_dict['id']
|
||||
|
||||
def in_download_archive(self, info_dict):
|
||||
fn = self.params.get('download_archive')
|
||||
if fn is None:
|
||||
return False
|
||||
|
||||
vid_id = self._make_archive_id(info_dict)
|
||||
if vid_id is None:
|
||||
return False # Incomplete video information
|
||||
# Future-proof against any change in case
|
||||
# and backwards compatibility with prior versions
|
||||
extractor = extractor.lower()
|
||||
vid_id = extractor + u' ' + info_dict['id']
|
||||
|
||||
try:
|
||||
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
|
||||
for line in archive_file:
|
||||
@@ -848,12 +939,15 @@ class YoutubeDL(object):
|
||||
fn = self.params.get('download_archive')
|
||||
if fn is None:
|
||||
return
|
||||
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
|
||||
vid_id = self._make_archive_id(info_dict)
|
||||
assert vid_id
|
||||
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
|
||||
archive_file.write(vid_id + u'\n')
|
||||
|
||||
@staticmethod
|
||||
def format_resolution(format, default='unknown'):
|
||||
if format.get('vcodec') == 'none':
|
||||
return 'audio only'
|
||||
if format.get('_resolution') is not None:
|
||||
return format['_resolution']
|
||||
if format.get('height') is not None:
|
||||
@@ -867,10 +961,11 @@ class YoutubeDL(object):
|
||||
|
||||
def list_formats(self, info_dict):
|
||||
def format_note(fdict):
|
||||
if fdict.get('format_note') is not None:
|
||||
return fdict['format_note']
|
||||
res = u''
|
||||
if fdict.get('vcodec') is not None:
|
||||
if fdict.get('format_note') is not None:
|
||||
res += fdict['format_note'] + u' '
|
||||
if (fdict.get('vcodec') is not None and
|
||||
fdict.get('vcodec') != 'none'):
|
||||
res += u'%-5s' % fdict['vcodec']
|
||||
elif fdict.get('vbr') is not None:
|
||||
res += u'video'
|
||||
@@ -886,25 +981,103 @@ class YoutubeDL(object):
|
||||
res += 'audio'
|
||||
if fdict.get('abr') is not None:
|
||||
res += u'@%3dk' % fdict['abr']
|
||||
if fdict.get('filesize') is not None:
|
||||
if res:
|
||||
res += u', '
|
||||
res += format_bytes(fdict['filesize'])
|
||||
return res
|
||||
|
||||
def line(format):
|
||||
return (u'%-20s%-10s%-12s%s' % (
|
||||
def line(format, idlen=20):
|
||||
return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
|
||||
format['format_id'],
|
||||
format['ext'],
|
||||
self.format_resolution(format),
|
||||
format_note(format),
|
||||
)
|
||||
)
|
||||
))
|
||||
|
||||
formats = info_dict.get('formats', [info_dict])
|
||||
formats_s = list(map(line, formats))
|
||||
idlen = max(len(u'format code'),
|
||||
max(len(f['format_id']) for f in formats))
|
||||
formats_s = [line(f, idlen) for f in formats]
|
||||
if len(formats) > 1:
|
||||
formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
|
||||
formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
|
||||
|
||||
header_line = line({
|
||||
'format_id': u'format code', 'ext': u'extension',
|
||||
'_resolution': u'resolution', 'format_note': u'note'})
|
||||
'_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
|
||||
self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
|
||||
(info_dict['id'], header_line, u"\n".join(formats_s)))
|
||||
|
||||
def urlopen(self, req):
|
||||
""" Start an HTTP download """
|
||||
return self._opener.open(req)
|
||||
|
||||
def print_debug_header(self):
|
||||
if not self.params.get('verbose'):
|
||||
return
|
||||
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
|
||||
try:
|
||||
sp = subprocess.Popen(
|
||||
['git', 'rev-parse', '--short', 'HEAD'],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)))
|
||||
out, err = sp.communicate()
|
||||
out = out.decode().strip()
|
||||
if re.match('[0-9a-f]+', out):
|
||||
write_string(u'[debug] Git HEAD: ' + out + u'\n')
|
||||
except:
|
||||
try:
|
||||
sys.exc_clear()
|
||||
except:
|
||||
pass
|
||||
write_string(u'[debug] Python version %s - %s' %
|
||||
(platform.python_version(), platform_name()) + u'\n')
|
||||
|
||||
proxy_map = {}
|
||||
for handler in self._opener.handlers:
|
||||
if hasattr(handler, 'proxies'):
|
||||
proxy_map.update(handler.proxies)
|
||||
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
|
||||
|
||||
def _setup_opener(self):
|
||||
timeout_val = self.params.get('socket_timeout')
|
||||
timeout = 600 if timeout_val is None else float(timeout_val)
|
||||
|
||||
opts_cookiefile = self.params.get('cookiefile')
|
||||
opts_proxy = self.params.get('proxy')
|
||||
|
||||
if opts_cookiefile is None:
|
||||
self.cookiejar = compat_cookiejar.CookieJar()
|
||||
else:
|
||||
self.cookiejar = compat_cookiejar.MozillaCookieJar(
|
||||
opts_cookiefile)
|
||||
if os.access(opts_cookiefile, os.R_OK):
|
||||
self.cookiejar.load()
|
||||
|
||||
cookie_processor = compat_urllib_request.HTTPCookieProcessor(
|
||||
self.cookiejar)
|
||||
if opts_proxy is not None:
|
||||
if opts_proxy == '':
|
||||
proxies = {}
|
||||
else:
|
||||
proxies = {'http': opts_proxy, 'https': opts_proxy}
|
||||
else:
|
||||
proxies = compat_urllib_request.getproxies()
|
||||
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
|
||||
if 'http' in proxies and 'https' not in proxies:
|
||||
proxies['https'] = proxies['http']
|
||||
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
|
||||
https_handler = make_HTTPS_handler(
|
||||
self.params.get('nocheckcertificate', False))
|
||||
opener = compat_urllib_request.build_opener(
|
||||
https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
|
||||
# Delete the default user-agent header, which would otherwise apply in
|
||||
# cases where our custom HTTP handler doesn't come into play
|
||||
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
|
||||
opener.addheaders = []
|
||||
self._opener = opener
|
||||
|
||||
# TODO remove this global modification
|
||||
compat_urllib_request.install_opener(opener)
|
||||
socket.setdefaulttimeout(timeout)
|
||||
|
@@ -36,50 +36,41 @@ __authors__ = (
|
||||
'Marcin Cieślak',
|
||||
'Anton Larionov',
|
||||
'Takuya Tsuchida',
|
||||
'Sergey M.',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
|
||||
import codecs
|
||||
import collections
|
||||
import getpass
|
||||
import optparse
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import shlex
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
import traceback
|
||||
import platform
|
||||
|
||||
|
||||
from .utils import (
|
||||
compat_cookiejar,
|
||||
compat_print,
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
DateRange,
|
||||
decodeOption,
|
||||
determine_ext,
|
||||
DownloadError,
|
||||
get_cachedir,
|
||||
make_HTTPS_handler,
|
||||
MaxDownloadsReached,
|
||||
platform_name,
|
||||
preferredencoding,
|
||||
SameFileError,
|
||||
std_headers,
|
||||
write_string,
|
||||
YoutubeDLHandler,
|
||||
)
|
||||
from .update import update_self
|
||||
from .version import __version__
|
||||
from .FileDownloader import (
|
||||
FileDownloader,
|
||||
)
|
||||
from .extractor import gen_extractors
|
||||
from .version import __version__
|
||||
from .YoutubeDL import YoutubeDL
|
||||
from .PostProcessor import (
|
||||
FFmpegMetadataPP,
|
||||
@@ -90,11 +81,11 @@ from .PostProcessor import (
|
||||
|
||||
|
||||
def parseOpts(overrideArguments=None):
|
||||
def _readOptions(filename_bytes):
|
||||
def _readOptions(filename_bytes, default=[]):
|
||||
try:
|
||||
optionf = open(filename_bytes)
|
||||
except IOError:
|
||||
return [] # silently skip if file is not present
|
||||
return default # silently skip if file is not present
|
||||
try:
|
||||
res = []
|
||||
for l in optionf:
|
||||
@@ -200,7 +191,9 @@ def parseOpts(overrideArguments=None):
|
||||
general.add_option('--extractor-descriptions',
|
||||
action='store_true', dest='list_extractor_descriptions',
|
||||
help='Output descriptions of all supported extractors', default=False)
|
||||
general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
|
||||
general.add_option(
|
||||
'--proxy', dest='proxy', default=None, metavar='URL',
|
||||
help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
|
||||
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
|
||||
general.add_option(
|
||||
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
|
||||
@@ -208,6 +201,12 @@ def parseOpts(overrideArguments=None):
|
||||
general.add_option(
|
||||
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
|
||||
help='Disable filesystem caching')
|
||||
general.add_option(
|
||||
'--socket-timeout', dest='socket_timeout',
|
||||
type=float, default=None, help=optparse.SUPPRESS_HELP)
|
||||
general.add_option(
|
||||
'--bidi-workaround', dest='bidi_workaround', action='store_true',
|
||||
help=u'Work around terminals that lack bidirectional text support. Requires fribidi executable in PATH')
|
||||
|
||||
|
||||
selection.add_option('--playlist-start',
|
||||
@@ -216,7 +215,9 @@ def parseOpts(overrideArguments=None):
|
||||
dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
|
||||
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
|
||||
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
|
||||
selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
|
||||
selection.add_option('--max-downloads', metavar='NUMBER',
|
||||
dest='max_downloads', type=int, default=None,
|
||||
help='Abort after downloading NUMBER files')
|
||||
selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
|
||||
selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
|
||||
selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
|
||||
@@ -228,7 +229,7 @@ def parseOpts(overrideArguments=None):
|
||||
default=None, type=int)
|
||||
selection.add_option('--download-archive', metavar='FILE',
|
||||
dest='download_archive',
|
||||
help='Download only videos not present in the archive file. Record all downloaded videos in it.')
|
||||
help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
|
||||
|
||||
|
||||
authentication.add_option('-u', '--username',
|
||||
@@ -243,7 +244,7 @@ def parseOpts(overrideArguments=None):
|
||||
|
||||
video_format.add_option('-f', '--format',
|
||||
action='store', dest='format', metavar='FORMAT', default='best',
|
||||
help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
|
||||
help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
|
||||
video_format.add_option('--all-formats',
|
||||
action='store_const', dest='format', help='download all available video formats', const='all')
|
||||
video_format.add_option('--prefer-free-formats',
|
||||
@@ -325,7 +326,7 @@ def parseOpts(overrideArguments=None):
|
||||
help='print downloaded pages to debug problems(very verbose)')
|
||||
verbosity.add_option('--write-pages',
|
||||
action='store_true', dest='write_pages', default=False,
|
||||
help='Write downloaded pages to files in the current directory')
|
||||
help='Write downloaded intermediary pages to files in the current directory to debug problems')
|
||||
verbosity.add_option('--youtube-print-sig-code',
|
||||
action='store_true', dest='youtube_print_sig_code', default=False,
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
@@ -423,6 +424,8 @@ def parseOpts(overrideArguments=None):
|
||||
if opts.verbose:
|
||||
write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
|
||||
else:
|
||||
systemConf = _readOptions('/etc/youtube-dl.conf')
|
||||
|
||||
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
|
||||
if xdg_config_home:
|
||||
userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
|
||||
@@ -432,8 +435,31 @@ def parseOpts(overrideArguments=None):
|
||||
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
|
||||
if not os.path.isfile(userConfFile):
|
||||
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
|
||||
systemConf = _readOptions('/etc/youtube-dl.conf')
|
||||
userConf = _readOptions(userConfFile)
|
||||
userConf = _readOptions(userConfFile, None)
|
||||
|
||||
if userConf is None:
|
||||
appdata_dir = os.environ.get('appdata')
|
||||
if appdata_dir:
|
||||
userConf = _readOptions(
|
||||
os.path.join(appdata_dir, 'youtube-dl', 'config'),
|
||||
default=None)
|
||||
if userConf is None:
|
||||
userConf = _readOptions(
|
||||
os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
|
||||
default=None)
|
||||
|
||||
if userConf is None:
|
||||
userConf = _readOptions(
|
||||
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
|
||||
default=None)
|
||||
if userConf is None:
|
||||
userConf = _readOptions(
|
||||
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
|
||||
default=None)
|
||||
|
||||
if userConf is None:
|
||||
userConf = []
|
||||
|
||||
commandLineConf = sys.argv[1:]
|
||||
argv = systemConf + userConf + commandLineConf
|
||||
opts, args = parser.parse_args(argv)
|
||||
@@ -452,19 +478,6 @@ def _real_main(argv=None):
|
||||
|
||||
parser, opts, args = parseOpts(argv)
|
||||
|
||||
# Open appropriate CookieJar
|
||||
if opts.cookiefile is None:
|
||||
jar = compat_cookiejar.CookieJar()
|
||||
else:
|
||||
try:
|
||||
jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile)
|
||||
if os.access(opts.cookiefile, os.R_OK):
|
||||
jar.load()
|
||||
except (IOError, OSError) as err:
|
||||
if opts.verbose:
|
||||
traceback.print_exc()
|
||||
write_string(u'ERROR: unable to open cookie file\n')
|
||||
sys.exit(101)
|
||||
# Set user agent
|
||||
if opts.user_agent is not None:
|
||||
std_headers['User-Agent'] = opts.user_agent
|
||||
@@ -496,8 +509,6 @@ def _real_main(argv=None):
|
||||
all_urls = batchurls + args
|
||||
all_urls = [url.strip() for url in all_urls]
|
||||
|
||||
opener = _setup_opener(jar=jar, opts=opts)
|
||||
|
||||
extractors = gen_extractors()
|
||||
|
||||
if opts.list_extractors:
|
||||
@@ -552,7 +563,7 @@ def _real_main(argv=None):
|
||||
if opts.retries is not None:
|
||||
try:
|
||||
opts.retries = int(opts.retries)
|
||||
except (TypeError, ValueError) as err:
|
||||
except (TypeError, ValueError):
|
||||
parser.error(u'invalid retry count specified')
|
||||
if opts.buffersize is not None:
|
||||
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
|
||||
@@ -563,13 +574,13 @@ def _real_main(argv=None):
|
||||
opts.playliststart = int(opts.playliststart)
|
||||
if opts.playliststart <= 0:
|
||||
raise ValueError(u'Playlist start must be positive')
|
||||
except (TypeError, ValueError) as err:
|
||||
except (TypeError, ValueError):
|
||||
parser.error(u'invalid playlist start number specified')
|
||||
try:
|
||||
opts.playlistend = int(opts.playlistend)
|
||||
if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
|
||||
raise ValueError(u'Playlist end must be greater than playlist start')
|
||||
except (TypeError, ValueError) as err:
|
||||
except (TypeError, ValueError):
|
||||
parser.error(u'invalid playlist end number specified')
|
||||
if opts.extractaudio:
|
||||
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
|
||||
@@ -672,34 +683,15 @@ def _real_main(argv=None):
|
||||
'youtube_print_sig_code': opts.youtube_print_sig_code,
|
||||
'age_limit': opts.age_limit,
|
||||
'download_archive': opts.download_archive,
|
||||
'cookiefile': opts.cookiefile,
|
||||
'nocheckcertificate': opts.no_check_certificate,
|
||||
'proxy': opts.proxy,
|
||||
'socket_timeout': opts.socket_timeout,
|
||||
'bidi_workaround': opts.bidi_workaround,
|
||||
}
|
||||
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
if opts.verbose:
|
||||
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
|
||||
try:
|
||||
sp = subprocess.Popen(
|
||||
['git', 'rev-parse', '--short', 'HEAD'],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)))
|
||||
out, err = sp.communicate()
|
||||
out = out.decode().strip()
|
||||
if re.match('[0-9a-f]+', out):
|
||||
write_string(u'[debug] Git HEAD: ' + out + u'\n')
|
||||
except:
|
||||
try:
|
||||
sys.exc_clear()
|
||||
except:
|
||||
pass
|
||||
write_string(u'[debug] Python version %s - %s' %
|
||||
(platform.python_version(), platform_name()) + u'\n')
|
||||
|
||||
proxy_map = {}
|
||||
for handler in opener.handlers:
|
||||
if hasattr(handler, 'proxies'):
|
||||
proxy_map.update(handler.proxies)
|
||||
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
|
||||
|
||||
ydl.print_debug_header()
|
||||
ydl.add_default_info_extractors()
|
||||
|
||||
# PostProcessors
|
||||
@@ -730,46 +722,9 @@ def _real_main(argv=None):
|
||||
ydl.to_screen(u'--max-download limit reached, aborting.')
|
||||
retcode = 101
|
||||
|
||||
# Dump cookie jar if requested
|
||||
if opts.cookiefile is not None:
|
||||
try:
|
||||
jar.save()
|
||||
except (IOError, OSError):
|
||||
sys.exit(u'ERROR: unable to save cookie jar')
|
||||
|
||||
sys.exit(retcode)
|
||||
|
||||
|
||||
def _setup_opener(jar=None, opts=None, timeout=300):
|
||||
if opts is None:
|
||||
FakeOptions = collections.namedtuple(
|
||||
'FakeOptions', ['proxy', 'no_check_certificate'])
|
||||
opts = FakeOptions(proxy=None, no_check_certificate=False)
|
||||
|
||||
cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
|
||||
if opts.proxy is not None:
|
||||
if opts.proxy == '':
|
||||
proxies = {}
|
||||
else:
|
||||
proxies = {'http': opts.proxy, 'https': opts.proxy}
|
||||
else:
|
||||
proxies = compat_urllib_request.getproxies()
|
||||
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
|
||||
if 'http' in proxies and 'https' not in proxies:
|
||||
proxies['https'] = proxies['http']
|
||||
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
|
||||
https_handler = make_HTTPS_handler(opts)
|
||||
opener = compat_urllib_request.build_opener(
|
||||
https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
|
||||
# Delete the default user-agent header, which would otherwise apply in
|
||||
# cases where our custom HTTP handler doesn't come into play
|
||||
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
|
||||
opener.addheaders = []
|
||||
compat_urllib_request.install_opener(opener)
|
||||
socket.setdefaulttimeout(timeout)
|
||||
return opener
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
try:
|
||||
_real_main(argv)
|
||||
|
@@ -8,6 +8,7 @@ from .arte import (
|
||||
ArteTVPlus7IE,
|
||||
ArteTVCreativeIE,
|
||||
ArteTVFutureIE,
|
||||
ArteTVDDCIE,
|
||||
)
|
||||
from .auengine import AUEngineIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
@@ -20,9 +21,11 @@ from .c56 import C56IE
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .cinemassacre import CinemassacreIE
|
||||
from .clipfish import ClipfishIE
|
||||
from .clipsyndicate import ClipsyndicateIE
|
||||
from .cnn import CNNIE
|
||||
from .collegehumor import CollegeHumorIE
|
||||
from .comedycentral import ComedyCentralIE
|
||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||
from .condenast import CondeNastIE
|
||||
from .criterion import CriterionIE
|
||||
from .cspan import CSpanIE
|
||||
@@ -54,7 +57,7 @@ from .flickr import FlickrIE
|
||||
from .francetv import (
|
||||
PluzzIE,
|
||||
FranceTvInfoIE,
|
||||
France2IE,
|
||||
FranceTVIE,
|
||||
GenerationQuoiIE
|
||||
)
|
||||
from .freesound import FreesoundIE
|
||||
@@ -70,6 +73,7 @@ from .hotnewhiphop import HotNewHipHopIE
|
||||
from .howcast import HowcastIE
|
||||
from .hypem import HypemIE
|
||||
from .ign import IGNIE, OneUPIE
|
||||
from .imdb import ImdbIE
|
||||
from .ina import InaIE
|
||||
from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE
|
||||
@@ -99,13 +103,16 @@ from .nbc import NBCNewsIE
|
||||
from .newgrounds import NewgroundsIE
|
||||
from .nhl import NHLIE, NHLVideocenterIE
|
||||
from .niconico import NiconicoIE
|
||||
from .ninegag import NineGagIE
|
||||
from .nowvideo import NowVideoIE
|
||||
from .ooyala import OoyalaIE
|
||||
from .orf import ORFIE
|
||||
from .pbs import PBSIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .podomatic import PodomaticIE
|
||||
from .pornhub import PornHubIE
|
||||
from .pornotube import PornotubeIE
|
||||
from .pyvideo import PyvideoIE
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .redtube import RedTubeIE
|
||||
from .ringtv import RingTVIE
|
||||
@@ -117,6 +124,11 @@ from .rutube import RutubeIE
|
||||
from .sina import SinaIE
|
||||
from .slashdot import SlashdotIE
|
||||
from .slideshare import SlideshareIE
|
||||
from .smotri import (
|
||||
SmotriIE,
|
||||
SmotriCommunityIE,
|
||||
SmotriUserIE,
|
||||
)
|
||||
from .sohu import SohuIE
|
||||
from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
|
||||
from .southparkstudios import (
|
||||
@@ -135,6 +147,7 @@ from .teamcoco import TeamcocoIE
|
||||
from .techtalks import TechTalksIE
|
||||
from .ted import TEDIE
|
||||
from .tf1 import TF1IE
|
||||
from .theplatform import ThePlatformIE
|
||||
from .thisav import ThisAVIE
|
||||
from .toutv import TouTvIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
@@ -155,7 +168,13 @@ from .viddler import ViddlerIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
from .videofyme import VideofyMeIE
|
||||
from .videopremium import VideoPremiumIE
|
||||
from .vimeo import VimeoIE, VimeoChannelIE
|
||||
from .vimeo import (
|
||||
VimeoIE,
|
||||
VimeoChannelIE,
|
||||
VimeoUserIE,
|
||||
VimeoAlbumIE,
|
||||
VimeoGroupsIE,
|
||||
)
|
||||
from .vine import VineIE
|
||||
from .viki import VikiIE
|
||||
from .vk import VKIE
|
||||
@@ -163,12 +182,17 @@ from .wat import WatIE
|
||||
from .websurg import WeBSurgIE
|
||||
from .weibo import WeiboIE
|
||||
from .wimp import WimpIE
|
||||
from .wistia import WistiaIE
|
||||
from .worldstarhiphop import WorldStarHipHopIE
|
||||
from .xhamster import XHamsterIE
|
||||
from .xnxx import XNXXIE
|
||||
from .xvideos import XVideosIE
|
||||
from .xtube import XTubeIE
|
||||
from .yahoo import YahooIE, YahooSearchIE
|
||||
from .yahoo import (
|
||||
YahooIE,
|
||||
YahooNewsIE,
|
||||
YahooSearchIE,
|
||||
)
|
||||
from .youjizz import YouJizzIE
|
||||
from .youku import YoukuIE
|
||||
from .youporn import YouPornIE
|
||||
@@ -185,6 +209,7 @@ from .youtube import (
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeWatchLaterIE,
|
||||
YoutubeFavouritesIE,
|
||||
YoutubeHistoryIE,
|
||||
)
|
||||
from .zdf import ZDFIE
|
||||
|
||||
|
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
class AddAnimeIE(InfoExtractor):
|
||||
|
||||
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
|
||||
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
|
||||
IE_NAME = u'AddAnime'
|
||||
_TEST = {
|
||||
u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
|
||||
|
@@ -1,5 +1,4 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
@@ -28,9 +27,8 @@ class AnitubeIE(InfoExtractor):
|
||||
key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
|
||||
webpage, u'key')
|
||||
|
||||
webpage_config = self._download_webpage('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
|
||||
config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
|
||||
key)
|
||||
config_xml = xml.etree.ElementTree.fromstring(webpage_config.encode('utf-8'))
|
||||
|
||||
video_title = config_xml.find('title').text
|
||||
|
||||
|
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class AppleTrailersIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
|
||||
_TEST = {
|
||||
u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
|
||||
u"playlist": [
|
||||
@@ -113,7 +113,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
})
|
||||
formats = sorted(formats, key=lambda f: (f['height'], f['width']))
|
||||
|
||||
info = {
|
||||
playlist.append({
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
@@ -124,12 +124,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
'upload_date': upload_date,
|
||||
'uploader_id': uploader_id,
|
||||
'user_agent': 'QuickTime compatible (youtube-dl)',
|
||||
}
|
||||
# TODO: Remove when #980 has been merged
|
||||
info['url'] = formats[-1]['url']
|
||||
info['ext'] = formats[-1]['ext']
|
||||
|
||||
playlist.append(info)
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
class ArchiveOrgIE(InfoExtractor):
|
||||
IE_NAME = 'archive.org'
|
||||
IE_DESC = 'archive.org videos'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?archive.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
|
||||
_TEST = {
|
||||
u"url": u"http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
|
||||
u'file': u'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
|
||||
@@ -49,7 +49,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
for f in formats:
|
||||
f['ext'] = determine_ext(f['url'])
|
||||
|
||||
info = {
|
||||
return {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
@@ -57,12 +57,5 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': data.get('misc', {}).get('image'),
|
||||
}
|
||||
thumbnail = data.get('misc', {}).get('image')
|
||||
if thumbnail:
|
||||
info['thumbnail'] = thumbnail
|
||||
|
||||
# TODO: Remove when #980 has been merged
|
||||
info.update(formats[-1])
|
||||
|
||||
return info
|
||||
|
@@ -1,7 +1,6 @@
|
||||
# encoding: utf-8
|
||||
import re
|
||||
import json
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -11,6 +10,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
get_element_by_id,
|
||||
compat_str,
|
||||
get_element_by_attribute,
|
||||
)
|
||||
|
||||
# There are different sources of video in arte.tv, the extraction process
|
||||
@@ -18,8 +18,8 @@ from ..utils import (
|
||||
# add tests.
|
||||
|
||||
class ArteTvIE(InfoExtractor):
|
||||
_VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html'
|
||||
_LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
|
||||
_VIDEOS_URL = r'(?:http://)?videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html'
|
||||
_LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
|
||||
_LIVE_URL = r'index-[0-9]+\.html$'
|
||||
|
||||
IE_NAME = u'arte.tv'
|
||||
@@ -78,8 +78,7 @@ class ArteTvIE(InfoExtractor):
|
||||
"""Extract from videos.arte.tv"""
|
||||
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
||||
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
||||
ref_xml = self._download_webpage(ref_xml_url, video_id, note=u'Downloading metadata')
|
||||
ref_xml_doc = xml.etree.ElementTree.fromstring(ref_xml)
|
||||
ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata')
|
||||
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
|
||||
config_xml_url = config_node.attrib['ref']
|
||||
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
|
||||
@@ -109,9 +108,8 @@ class ArteTvIE(InfoExtractor):
|
||||
"""Extract form http://liveweb.arte.tv/"""
|
||||
webpage = self._download_webpage(url, name)
|
||||
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
|
||||
config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
|
||||
config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
|
||||
video_id, u'Downloading information')
|
||||
config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
||||
event_doc = config_doc.find('event')
|
||||
url_node = event_doc.find('video').find('urlHd')
|
||||
if url_node is None:
|
||||
@@ -145,7 +143,9 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
|
||||
def _extract_from_webpage(self, webpage, video_id, lang):
|
||||
json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
||||
def _extract_from_json_url(self, json_url, video_id, lang):
|
||||
json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
|
||||
self.report_extraction(video_id)
|
||||
info = json.loads(json_info)
|
||||
@@ -260,3 +260,35 @@ class ArteTVFutureIE(ArteTVPlus7IE):
|
||||
webpage = self._download_webpage(url, anchor_id)
|
||||
row = get_element_by_id(anchor_id, webpage)
|
||||
return self._extract_from_webpage(row, anchor_id, lang)
|
||||
|
||||
|
||||
class ArteTVDDCIE(ArteTVPlus7IE):
|
||||
IE_NAME = u'arte.tv:ddc'
|
||||
_VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://ddc.arte.tv/folge/neues-aus-mauretanien',
|
||||
u'file': u'049881-009_PLUS7-D.flv',
|
||||
u'info_dict': {
|
||||
u'title': u'Mit offenen Karten',
|
||||
u'description': u'md5:57929b0eaeddeb8a0c983f58e9ebd3b6',
|
||||
u'upload_date': u'20131207',
|
||||
},
|
||||
u'params': {
|
||||
# rtmp download
|
||||
u'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, lang = self._extract_url_info(url)
|
||||
if lang == 'folge':
|
||||
lang = 'de'
|
||||
elif lang == 'emission':
|
||||
lang = 'fr'
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
scriptElement = get_element_by_attribute('class', 'visu_video_block', webpage)
|
||||
script_url = self._html_search_regex(r'src="(.*?)"', scriptElement, 'script url')
|
||||
javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator')
|
||||
json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url')
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
@@ -16,7 +16,7 @@ class AUEngineIE(InfoExtractor):
|
||||
u"title": u"[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]"
|
||||
}
|
||||
}
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed.php\?.*?file=([^&]+).*?'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed\.php\?.*?file=([^&]+).*?'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@@ -25,6 +25,11 @@ class BambuserIE(InfoExtractor):
|
||||
u'uploader': u'pixelversity',
|
||||
u'uploader_id': u'344706',
|
||||
},
|
||||
u'params': {
|
||||
# It doesn't respect the 'Range' header, it would download the whole video
|
||||
# caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59
|
||||
u'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -49,7 +54,7 @@ class BambuserIE(InfoExtractor):
|
||||
|
||||
class BambuserChannelIE(InfoExtractor):
|
||||
IE_NAME = u'bambuser:channel'
|
||||
_VALID_URL = r'http://bambuser.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
|
||||
_VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
|
||||
# The maximum number we can get with each request
|
||||
_STEP = 50
|
||||
|
||||
|
@@ -51,8 +51,7 @@ class BlipTVIE(InfoExtractor):
|
||||
url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id')
|
||||
urlp = compat_urllib_parse_urlparse(url)
|
||||
if urlp.path.startswith('/play/'):
|
||||
request = compat_urllib_request.Request(url)
|
||||
response = compat_urllib_request.urlopen(request)
|
||||
response = self._request_webpage(url, None, False)
|
||||
redirecturl = response.geturl()
|
||||
rurlp = compat_urllib_parse_urlparse(redirecturl)
|
||||
file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2]
|
||||
@@ -69,25 +68,23 @@ class BlipTVIE(InfoExtractor):
|
||||
request.add_header('User-Agent', 'iTunes/10.6.1')
|
||||
self.report_extraction(mobj.group(1))
|
||||
info = None
|
||||
try:
|
||||
urlh = compat_urllib_request.urlopen(request)
|
||||
if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
|
||||
basename = url.split('/')[-1]
|
||||
title,ext = os.path.splitext(basename)
|
||||
title = title.decode('UTF-8')
|
||||
ext = ext.replace('.', '')
|
||||
self.report_direct_download(title)
|
||||
info = {
|
||||
'id': title,
|
||||
'url': url,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': title,
|
||||
'ext': ext,
|
||||
'urlhandle': urlh
|
||||
}
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
|
||||
urlh = self._request_webpage(request, None, False,
|
||||
u'unable to download video info webpage')
|
||||
if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
|
||||
basename = url.split('/')[-1]
|
||||
title,ext = os.path.splitext(basename)
|
||||
title = title.decode('UTF-8')
|
||||
ext = ext.replace('.', '')
|
||||
self.report_direct_download(title)
|
||||
info = {
|
||||
'id': title,
|
||||
'url': url,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': title,
|
||||
'ext': ext,
|
||||
'urlhandle': urlh
|
||||
}
|
||||
if info is None: # Regular URL
|
||||
try:
|
||||
json_code_bytes = urlh.read()
|
||||
|
@@ -4,7 +4,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class BloombergIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?).html'
|
||||
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
|
||||
|
@@ -76,18 +76,21 @@ class BrightcoveIE(InfoExtractor):
|
||||
'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
|
||||
}
|
||||
def find_param(name):
|
||||
return find_xpath_attr(object_doc, './param', 'name', name)
|
||||
node = find_xpath_attr(object_doc, './param', 'name', name)
|
||||
if node is not None:
|
||||
return node.attrib['value']
|
||||
return None
|
||||
playerKey = find_param('playerKey')
|
||||
# Not all pages define this value
|
||||
if playerKey is not None:
|
||||
params['playerKey'] = playerKey.attrib['value']
|
||||
params['playerKey'] = playerKey
|
||||
# The three fields hold the id of the video
|
||||
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
|
||||
if videoPlayer is not None:
|
||||
params['@videoPlayer'] = videoPlayer.attrib['value']
|
||||
params['@videoPlayer'] = videoPlayer
|
||||
linkBase = find_param('linkBaseURL')
|
||||
if linkBase is not None:
|
||||
params['linkBaseURL'] = linkBase.attrib['value']
|
||||
params['linkBaseURL'] = linkBase
|
||||
data = compat_urllib_parse.urlencode(params)
|
||||
return cls._FEDERATED_URL_TEMPLATE % data
|
||||
|
||||
|
@@ -1,6 +1,5 @@
|
||||
# encoding: utf-8
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
@@ -31,11 +30,10 @@ class CanalplusIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, mobj.group('path'))
|
||||
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
|
||||
info_url = self._VIDEO_INFO_TEMPLATE % video_id
|
||||
info_page = self._download_webpage(info_url,video_id,
|
||||
doc = self._download_xml(info_url,video_id,
|
||||
u'Downloading video info')
|
||||
|
||||
self.report_extraction(video_id)
|
||||
doc = xml.etree.ElementTree.fromstring(info_page.encode('utf-8'))
|
||||
video_info = [video for video in doc if video.find('ID').text == video_id][0]
|
||||
infos = video_info.find('INFOS')
|
||||
media = video_info.find('MEDIA')
|
||||
|
58
youtube_dl/extractor/clipfish.py
Normal file
58
youtube_dl/extractor/clipfish.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import re
|
||||
import time
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class ClipfishIE(InfoExtractor):
|
||||
IE_NAME = u'clipfish'
|
||||
|
||||
_VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
|
||||
_TEST = {
|
||||
u'url': u'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
|
||||
u'file': u'3966754.mp4',
|
||||
u'md5': u'2521cd644e862936cf2e698206e47385',
|
||||
u'info_dict': {
|
||||
u'title': u'FIFA 14 - E3 2013 Trailer',
|
||||
u'duration': 82,
|
||||
},
|
||||
u'skip': 'Blocked in the US'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
|
||||
info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
|
||||
(video_id, int(time.time())))
|
||||
doc = self._download_xml(
|
||||
info_url, video_id, note=u'Downloading info page')
|
||||
title = doc.find('title').text
|
||||
video_url = doc.find('filename').text
|
||||
if video_url is None:
|
||||
xml_bytes = xml.etree.ElementTree.tostring(doc)
|
||||
raise ExtractorError(u'Cannot find video URL in document %r' %
|
||||
xml_bytes)
|
||||
thumbnail = doc.find('imageurl').text
|
||||
duration_str = doc.find('duration').text
|
||||
m = re.match(
|
||||
r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
|
||||
duration_str)
|
||||
if m:
|
||||
duration = (
|
||||
(int(m.group('hours')) * 60 * 60) +
|
||||
(int(m.group('minutes')) * 60) +
|
||||
(int(m.group('seconds')))
|
||||
)
|
||||
else:
|
||||
duration = None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
}
|
52
youtube_dl/extractor/clipsyndicate.py
Normal file
52
youtube_dl/extractor/clipsyndicate.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
find_xpath_attr,
|
||||
)
|
||||
|
||||
|
||||
class ClipsyndicateIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
|
||||
u'md5': u'4d7d549451bad625e0ff3d7bd56d776c',
|
||||
u'info_dict': {
|
||||
u'id': u'4629301',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'Brick Briscoe',
|
||||
u'duration': 612,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
js_player = self._download_webpage(
|
||||
'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
|
||||
video_id, u'Downlaoding player')
|
||||
# it includes a required token
|
||||
flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars')
|
||||
|
||||
playlist_page = self._download_webpage(
|
||||
'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
|
||||
video_id, u'Downloading video info')
|
||||
# Fix broken xml
|
||||
playlist_page = re.sub('&', '&', playlist_page)
|
||||
pdoc = xml.etree.ElementTree.fromstring(playlist_page.encode('utf-8'))
|
||||
|
||||
track_doc = pdoc.find('trackList/track')
|
||||
def find_param(name):
|
||||
node = find_xpath_attr(track_doc, './/param', 'name', name)
|
||||
if node is not None:
|
||||
return node.attrib['value']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': find_param('title'),
|
||||
'url': track_doc.find('location').text,
|
||||
'thumbnail': find_param('thumbnail'),
|
||||
'duration': int(find_param('duration')),
|
||||
}
|
@@ -1,5 +1,4 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
@@ -33,8 +32,7 @@ class CNNIE(InfoExtractor):
|
||||
path = mobj.group('path')
|
||||
page_title = mobj.group('title')
|
||||
info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
|
||||
info_xml = self._download_webpage(info_url, page_title)
|
||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
||||
info = self._download_xml(info_url, page_title)
|
||||
|
||||
formats = []
|
||||
for f in info.findall('files/file'):
|
||||
|
@@ -1,5 +1,4 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -46,11 +45,10 @@ class CollegeHumorIE(InfoExtractor):
|
||||
|
||||
self.report_extraction(video_id)
|
||||
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
|
||||
metaXml = self._download_webpage(xmlUrl, video_id,
|
||||
mdoc = self._download_xml(xmlUrl, video_id,
|
||||
u'Downloading info XML',
|
||||
u'Unable to download video info XML')
|
||||
|
||||
mdoc = xml.etree.ElementTree.fromstring(metaXml)
|
||||
try:
|
||||
videoNode = mdoc.findall('./video')[0]
|
||||
youtubeIdNode = videoNode.find('./youtubeID')
|
||||
@@ -65,11 +63,10 @@ class CollegeHumorIE(InfoExtractor):
|
||||
|
||||
if next_url.endswith(u'manifest.f4m'):
|
||||
manifest_url = next_url + '?hdcore=2.10.3'
|
||||
manifestXml = self._download_webpage(manifest_url, video_id,
|
||||
adoc = self._download_xml(manifest_url, video_id,
|
||||
u'Downloading XML manifest',
|
||||
u'Unable to download video info XML')
|
||||
|
||||
adoc = xml.etree.ElementTree.fromstring(manifestXml)
|
||||
try:
|
||||
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
||||
except IndexError:
|
||||
|
@@ -1,7 +1,7 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
@@ -11,7 +11,31 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class ComedyCentralIE(InfoExtractor):
|
||||
class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www.)?comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
|
||||
_FEED_URL = u'http://comedycentral.com/feeds/mrss/'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
|
||||
u'md5': u'4167875aae411f903b751a21f357f1ee',
|
||||
u'info_dict': {
|
||||
u'id': u'cef0cbb3-e776-4bc9-b62e-8016deccb354',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'Uncensored - Greg Fitzsimmons - Too Good of a Mother',
|
||||
u'description': u'After a certain point, breastfeeding becomes c**kblocking.',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
mgid = self._search_regex(r'data-mgid="(?P<mgid>mgid:.*?)"',
|
||||
webpage, u'mgid')
|
||||
return self._get_videos_info(mgid)
|
||||
|
||||
|
||||
class ComedyCentralShowsIE(InfoExtractor):
|
||||
IE_DESC = u'The Daily Show / Colbert Report'
|
||||
# urls can be abbreviations like :thedailyshow or :colbert
|
||||
# urls for episodes like:
|
||||
@@ -127,13 +151,12 @@ class ComedyCentralIE(InfoExtractor):
|
||||
|
||||
uri = mMovieParams[0][1]
|
||||
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
|
||||
indexXml = self._download_webpage(indexUrl, epTitle,
|
||||
idoc = self._download_xml(indexUrl, epTitle,
|
||||
u'Downloading show index',
|
||||
u'unable to download episode index')
|
||||
|
||||
results = []
|
||||
|
||||
idoc = xml.etree.ElementTree.fromstring(indexXml)
|
||||
itemEls = idoc.findall('.//item')
|
||||
for partNum,itemEl in enumerate(itemEls):
|
||||
mediaId = itemEl.findall('./guid')[0].text
|
||||
@@ -144,10 +167,9 @@ class ComedyCentralIE(InfoExtractor):
|
||||
|
||||
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
|
||||
compat_urllib_parse.urlencode({'uri': mediaId}))
|
||||
configXml = self._download_webpage(configUrl, epTitle,
|
||||
cdoc = self._download_xml(configUrl, epTitle,
|
||||
u'Downloading configuration for %s' % shortMediaId)
|
||||
|
||||
cdoc = xml.etree.ElementTree.fromstring(configXml)
|
||||
turls = []
|
||||
for rendition in cdoc.findall('.//rendition'):
|
||||
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
|
||||
@@ -169,7 +191,7 @@ class ComedyCentralIE(InfoExtractor):
|
||||
})
|
||||
|
||||
effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
|
||||
info = {
|
||||
results.append({
|
||||
'id': shortMediaId,
|
||||
'formats': formats,
|
||||
'uploader': showId,
|
||||
@@ -177,11 +199,6 @@ class ComedyCentralIE(InfoExtractor):
|
||||
'title': effTitle,
|
||||
'thumbnail': None,
|
||||
'description': compat_str(officialTitle),
|
||||
}
|
||||
|
||||
# TODO: Remove when #980 has been merged
|
||||
info.update(info['formats'][-1])
|
||||
|
||||
results.append(info)
|
||||
})
|
||||
|
||||
return results
|
||||
|
@@ -4,11 +4,11 @@ import re
|
||||
import socket
|
||||
import sys
|
||||
import netrc
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
compat_str,
|
||||
|
||||
clean_html,
|
||||
@@ -19,6 +19,7 @@ from ..utils import (
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class InfoExtractor(object):
|
||||
"""Information Extractor class.
|
||||
|
||||
@@ -54,6 +55,9 @@ class InfoExtractor(object):
|
||||
subtitles: The subtitle file contents as a dictionary in the format
|
||||
{language: subtitles}.
|
||||
view_count: How many users have watched the video on the platform.
|
||||
like_count: Number of positive ratings of the video
|
||||
dislike_count: Number of negative ratings of the video
|
||||
comment_count: Number of comments on the video
|
||||
urlhandle: [internal] The urlHandle to be used to download the file,
|
||||
like returned by urllib.request.urlopen
|
||||
age_limit: Age restriction for the video, as an integer (years)
|
||||
@@ -75,6 +79,7 @@ class InfoExtractor(object):
|
||||
* acodec Name of the audio codec in use
|
||||
* vbr Average video bitrate in KBit/s
|
||||
* vcodec Name of the video codec in use
|
||||
* filesize The number of bytes, if known in advance
|
||||
webpage_url: The url to the video webpage, if given to youtube-dl it
|
||||
should allow to get the same result again. (It will be set
|
||||
by YoutubeDL if it's missing)
|
||||
@@ -149,27 +154,38 @@ class InfoExtractor(object):
|
||||
def IE_NAME(self):
|
||||
return type(self).__name__[:-2]
|
||||
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None):
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
||||
""" Returns the response handle """
|
||||
if note is None:
|
||||
self.report_download_webpage(video_id)
|
||||
elif note is not False:
|
||||
self.to_screen(u'%s: %s' % (video_id, note))
|
||||
if video_id is None:
|
||||
self.to_screen(u'%s' % (note,))
|
||||
else:
|
||||
self.to_screen(u'%s: %s' % (video_id, note))
|
||||
try:
|
||||
return compat_urllib_request.urlopen(url_or_request)
|
||||
return self._downloader.urlopen(url_or_request)
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
if errnote is None:
|
||||
errnote = u'Unable to download webpage'
|
||||
raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err)
|
||||
errmsg = u'%s: %s' % (errnote, compat_str(err))
|
||||
if fatal:
|
||||
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
|
||||
else:
|
||||
self._downloader.report_warning(errmsg)
|
||||
return False
|
||||
|
||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
|
||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
||||
""" Returns a tuple (page content as string, URL handle) """
|
||||
|
||||
# Strip hashes from the URL (#1038)
|
||||
if isinstance(url_or_request, (compat_str, str)):
|
||||
url_or_request = url_or_request.partition('#')[0]
|
||||
|
||||
urlh = self._request_webpage(url_or_request, video_id, note, errnote)
|
||||
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal)
|
||||
if urlh is False:
|
||||
assert not fatal
|
||||
return False
|
||||
content_type = urlh.headers.get('Content-Type', '')
|
||||
webpage_bytes = urlh.read()
|
||||
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
|
||||
@@ -204,9 +220,20 @@ class InfoExtractor(object):
|
||||
content = webpage_bytes.decode(encoding, 'replace')
|
||||
return (content, urlh)
|
||||
|
||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
|
||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
||||
""" Returns the data of the page as a string """
|
||||
return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
|
||||
res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
|
||||
if res is False:
|
||||
return res
|
||||
else:
|
||||
content, _ = res
|
||||
return content
|
||||
|
||||
def _download_xml(self, url_or_request, video_id,
|
||||
note=u'Downloading XML', errnote=u'Unable to download XML'):
|
||||
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||
xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
||||
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
|
||||
|
||||
def to_screen(self, msg):
|
||||
"""Print msg to screen, prefixing it with '[ie_name]'"""
|
||||
@@ -356,7 +383,8 @@ class InfoExtractor(object):
|
||||
if display_name is None:
|
||||
display_name = name
|
||||
return self._html_search_regex(
|
||||
r'''(?ix)<meta(?=[^>]+(?:name|property)=["\']%s["\'])
|
||||
r'''(?ix)<meta
|
||||
(?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
|
||||
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
|
||||
html, display_name, fatal=False)
|
||||
|
||||
|
@@ -6,7 +6,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
class CSpanIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www.c-spanvideo.org/program/(.*)'
|
||||
_VALID_URL = r'http://www\.c-spanvideo\.org/program/(.*)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.c-spanvideo.org/program/HolderonV',
|
||||
u'file': u'315139.flv',
|
||||
|
@@ -11,6 +11,7 @@ from ..utils import (
|
||||
get_element_by_attribute,
|
||||
get_element_by_id,
|
||||
orderedSet,
|
||||
str_to_int,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
@@ -146,6 +147,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
self._list_available_subtitles(video_id, webpage)
|
||||
return
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'video_views_value[^>]+>([\d\.,]+)<', webpage, u'view count'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
@@ -155,6 +159,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'subtitles': video_subtitles,
|
||||
'thumbnail': info['thumbnail_url'],
|
||||
'age_limit': age_limit,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
||||
def _get_available_subtitles(self, video_id, webpage):
|
||||
|
@@ -1,6 +1,5 @@
|
||||
# encoding: utf-8
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -29,17 +28,16 @@ class DaumIE(InfoExtractor):
|
||||
video_id = mobj.group(1)
|
||||
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
|
||||
webpage = self._download_webpage(canonical_url, video_id)
|
||||
full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
|
||||
full_id = self._search_regex(
|
||||
r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]',
|
||||
webpage, u'full id')
|
||||
query = compat_urllib_parse.urlencode({'vid': full_id})
|
||||
info_xml = self._download_webpage(
|
||||
info = self._download_xml(
|
||||
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
||||
u'Downloading video info')
|
||||
urls_xml = self._download_webpage(
|
||||
urls = self._download_xml(
|
||||
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
|
||||
video_id, u'Downloading video formats info')
|
||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
||||
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
|
||||
|
||||
self.to_screen(u'%s: Getting video urls' % video_id)
|
||||
formats = []
|
||||
@@ -49,10 +47,9 @@ class DaumIE(InfoExtractor):
|
||||
'vid': full_id,
|
||||
'profile': profile,
|
||||
})
|
||||
url_xml = self._download_webpage(
|
||||
url_doc = self._download_xml(
|
||||
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
|
||||
video_id, note=False)
|
||||
url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8'))
|
||||
format_url = url_doc.find('result/url').text
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
@@ -60,7 +57,7 @@ class DaumIE(InfoExtractor):
|
||||
'format_id': profile,
|
||||
})
|
||||
|
||||
info = {
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info.find('TITLE').text,
|
||||
'formats': formats,
|
||||
@@ -69,6 +66,3 @@ class DaumIE(InfoExtractor):
|
||||
'duration': int(info.find('DURATION').text),
|
||||
'upload_date': info.find('REGDTTM').text[:8],
|
||||
}
|
||||
# TODO: Remove when #980 has been merged
|
||||
info.update(formats[-1])
|
||||
return info
|
||||
|
@@ -1,7 +1,6 @@
|
||||
# coding: utf-8
|
||||
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -12,7 +11,7 @@ from ..utils import (
|
||||
|
||||
class DreiSatIE(InfoExtractor):
|
||||
IE_NAME = '3sat'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?3sat.de/mediathek/index.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/index\.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
||||
_TEST = {
|
||||
u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
|
||||
u'file': u'36983.webm',
|
||||
@@ -30,8 +29,7 @@ class DreiSatIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
||||
details_xml = self._download_webpage(details_url, video_id, note=u'Downloading video details')
|
||||
details_doc = xml.etree.ElementTree.fromstring(details_xml.encode('utf-8'))
|
||||
details_doc = self._download_xml(details_url, video_id, note=u'Downloading video details')
|
||||
|
||||
thumbnail_els = details_doc.findall('.//teaserimage')
|
||||
thumbnails = [{
|
||||
@@ -67,7 +65,7 @@ class DreiSatIE(InfoExtractor):
|
||||
return (qidx, prefer_http, format['video_bitrate'])
|
||||
formats.sort(key=_sortkey)
|
||||
|
||||
info = {
|
||||
return {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
@@ -78,8 +76,3 @@ class DreiSatIE(InfoExtractor):
|
||||
'uploader': video_uploader,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
# TODO: Remove when #980 has been merged
|
||||
info.update(formats[-1])
|
||||
|
||||
return info
|
||||
|
@@ -1,5 +1,4 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
@@ -21,9 +20,8 @@ class EbaumsWorldIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
config_xml = self._download_webpage(
|
||||
config = self._download_xml(
|
||||
'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
|
||||
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
||||
video_url = config.find('file').text
|
||||
|
||||
return {
|
||||
|
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
class EightTracksIE(InfoExtractor):
|
||||
IE_NAME = '8tracks'
|
||||
_VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
|
||||
_VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
|
||||
_TEST = {
|
||||
u"name": u"EightTracks",
|
||||
u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
|
||||
|
@@ -8,7 +8,7 @@ class ExfmIE(InfoExtractor):
|
||||
IE_NAME = u'exfm'
|
||||
IE_DESC = u'ex.fm'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)'
|
||||
_SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
|
||||
_SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream'
|
||||
_TESTS = [
|
||||
{
|
||||
u'url': u'http://ex.fm/song/eh359',
|
||||
|
@@ -1,6 +1,5 @@
|
||||
# encoding: utf-8
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -10,7 +9,7 @@ from ..utils import (
|
||||
|
||||
class FazIE(InfoExtractor):
|
||||
IE_NAME = u'faz.net'
|
||||
_VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+).html'
|
||||
_VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+)\.html'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
|
||||
@@ -28,9 +27,8 @@ class FazIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage,
|
||||
u'config xml url')
|
||||
config_xml = self._download_webpage(config_xml_url, video_id,
|
||||
config = self._download_xml(config_xml_url, video_id,
|
||||
u'Downloading config xml')
|
||||
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
||||
|
||||
encodings = config.find('ENCODINGS')
|
||||
formats = []
|
||||
@@ -46,13 +44,10 @@ class FazIE(InfoExtractor):
|
||||
})
|
||||
|
||||
descr = self._html_search_regex(r'<p class="Content Copy">(.*?)</p>', webpage, u'description')
|
||||
info = {
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'formats': formats,
|
||||
'description': descr,
|
||||
'thumbnail': config.find('STILL/STILL_BIG').text,
|
||||
}
|
||||
# TODO: Remove when #980 has been merged
|
||||
info.update(formats[-1])
|
||||
return info
|
||||
|
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
class FKTVIE(InfoExtractor):
|
||||
IE_NAME = u'fernsehkritik.tv'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik\.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://fernsehkritik.tv/folge-1',
|
||||
@@ -52,7 +52,7 @@ class FKTVIE(InfoExtractor):
|
||||
|
||||
class FKTVPosteckeIE(InfoExtractor):
|
||||
IE_NAME = u'fernsehkritik.tv:postecke'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/inline-video/postecke.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik\.tv/inline-video/postecke\.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)'
|
||||
_TEST = {
|
||||
u'url': u'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120',
|
||||
u'file': u'0120.flv',
|
||||
|
@@ -1,6 +1,5 @@
|
||||
# encoding: utf-8
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -11,11 +10,10 @@ from ..utils import (
|
||||
|
||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
def _extract_video(self, video_id):
|
||||
xml_desc = self._download_webpage(
|
||||
info = self._download_xml(
|
||||
'http://www.francetvinfo.fr/appftv/webservices/video/'
|
||||
'getInfosOeuvre.php?id-diffusion='
|
||||
+ video_id, video_id, 'Downloading XML config')
|
||||
info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))
|
||||
|
||||
manifest_url = info.find('videos/video/url').text
|
||||
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
|
||||
@@ -23,7 +21,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
thumbnail_path = info.find('image').text
|
||||
|
||||
return {'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv' if video_url.startswith('rtmp') else 'mp4',
|
||||
'url': video_url,
|
||||
'title': info.find('titre').text,
|
||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
|
||||
@@ -47,7 +45,7 @@ class PluzzIE(FranceTVBaseInfoExtractor):
|
||||
|
||||
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = u'francetvinfo.fr'
|
||||
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+).html'
|
||||
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+)\.html'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
|
||||
@@ -68,35 +66,101 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
return self._extract_video(video_id)
|
||||
|
||||
|
||||
class France2IE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = u'france2.fr'
|
||||
_VALID_URL = r'''(?x)https?://www\.france2\.fr/
|
||||
class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = u'francetv'
|
||||
IE_DESC = u'France 2, 3, 4, 5 and Ô'
|
||||
_VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/
|
||||
(?:
|
||||
emissions/.*?/videos/(?P<id>\d+)
|
||||
| emission/(?P<key>[^/?]+)
|
||||
emissions/.*?/(videos|emissions)/(?P<id>[^/?]+)
|
||||
| (emissions?|jt)/(?P<key>[^/?]+)
|
||||
)'''
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
|
||||
u'file': u'75540104.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'13h15, le samedi...',
|
||||
u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d',
|
||||
_TESTS = [
|
||||
# france2
|
||||
{
|
||||
u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
|
||||
u'file': u'75540104.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'13h15, le samedi...',
|
||||
u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d',
|
||||
},
|
||||
u'params': {
|
||||
# m3u8 download
|
||||
u'skip_download': True,
|
||||
},
|
||||
},
|
||||
u'params': {
|
||||
u'skip_download': True,
|
||||
# france3
|
||||
{
|
||||
u'url': u'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
|
||||
u'info_dict': {
|
||||
u'id': u'000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
|
||||
u'ext': u'flv',
|
||||
u'title': u'Le scandale du prix des médicaments',
|
||||
u'description': u'md5:1384089fbee2f04fc6c9de025ee2e9ce',
|
||||
},
|
||||
u'params': {
|
||||
# rtmp download
|
||||
u'skip_download': True,
|
||||
},
|
||||
},
|
||||
}
|
||||
# france4
|
||||
{
|
||||
u'url': u'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
||||
u'info_dict': {
|
||||
u'id': u'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
||||
u'ext': u'flv',
|
||||
u'title': u'Hero Corp Making of - Extrait 1',
|
||||
u'description': u'md5:c87d54871b1790679aec1197e73d650a',
|
||||
},
|
||||
u'params': {
|
||||
# rtmp download
|
||||
u'skip_download': True,
|
||||
},
|
||||
},
|
||||
# france5
|
||||
{
|
||||
u'url': u'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
|
||||
u'info_dict': {
|
||||
u'id': u'92837968',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'C à dire ?!',
|
||||
u'description': u'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
|
||||
},
|
||||
u'params': {
|
||||
# m3u8 download
|
||||
u'skip_download': True,
|
||||
},
|
||||
},
|
||||
# franceo
|
||||
{
|
||||
u'url': u'http://www.franceo.fr/jt/info-afrique/04-12-2013',
|
||||
u'info_dict': {
|
||||
u'id': u'92327925',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'Infô-Afrique',
|
||||
u'description': u'md5:ebf346da789428841bee0fd2a935ea55',
|
||||
},
|
||||
u'params': {
|
||||
# m3u8 download
|
||||
u'skip_download': True,
|
||||
},
|
||||
u'skip': u'The id changes frequently',
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj.group('key'):
|
||||
webpage = self._download_webpage(url, mobj.group('key'))
|
||||
video_id = self._html_search_regex(
|
||||
r'''(?x)<div\s+class="video-player">\s*
|
||||
id_res = [
|
||||
(r'''(?x)<div\s+class="video-player">\s*
|
||||
<a\s+href="http://videos.francetv.fr/video/([0-9]+)"\s+
|
||||
class="francetv-video-player">''',
|
||||
webpage, u'video ID')
|
||||
class="francetv-video-player">'''),
|
||||
(r'<a id="player_direct" href="http://info\.francetelevisions'
|
||||
'\.fr/\?id-video=([^"/&]+)'),
|
||||
(r'<a class="video" id="ftv_player_(.+?)"'),
|
||||
]
|
||||
video_id = self._html_search_regex(id_res, webpage, u'video ID')
|
||||
else:
|
||||
video_id = mobj.group('id')
|
||||
return self._extract_video(video_id)
|
||||
|
@@ -4,7 +4,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class GamekingsIE(InfoExtractor):
|
||||
_VALID_URL = r'http?://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
|
||||
_VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
|
||||
_TEST = {
|
||||
u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/",
|
||||
u'file': u'20130811.mp4',
|
||||
|
@@ -47,13 +47,10 @@ class GameSpotIE(InfoExtractor):
|
||||
'format_id': q,
|
||||
})
|
||||
|
||||
info = {
|
||||
return {
|
||||
'id': data_video['guid'],
|
||||
'title': compat_urllib_parse.unquote(data_video['title']),
|
||||
'formats': formats,
|
||||
'description': get_meta_content('description', webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
# TODO: Remove when #980 has been merged
|
||||
info.update(formats[-1])
|
||||
return info
|
||||
|
@@ -1,13 +1,10 @@
|
||||
import re
|
||||
|
||||
from .mtv import MTVIE, _media_xml_tag
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
|
||||
class GametrailersIE(MTVIE):
|
||||
"""
|
||||
Gametrailers use the same videos system as MTVIE, it just changes the feed
|
||||
url, where the uri is and the method to get the thumbnails.
|
||||
"""
|
||||
_VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
|
||||
|
||||
class GametrailersIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'http://www\.gametrailers\.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
|
||||
u'file': u'70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4',
|
||||
@@ -17,15 +14,9 @@ class GametrailersIE(MTVIE):
|
||||
u'description': u'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!',
|
||||
},
|
||||
}
|
||||
# Overwrite MTVIE properties we don't want
|
||||
_TESTS = []
|
||||
|
||||
_FEED_URL = 'http://www.gametrailers.com/feeds/mrss'
|
||||
|
||||
def _get_thumbnail_url(self, uri, itemdoc):
|
||||
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
|
||||
return itemdoc.find(search_path).attrib['url']
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
@@ -169,8 +169,13 @@ class GenericIE(InfoExtractor):
|
||||
# Site Name | Video Title
|
||||
# Video Title - Tagline | Site Name
|
||||
# and so on and so forth; it's just not practical
|
||||
video_title = self._html_search_regex(r'<title>(.*)</title>',
|
||||
webpage, u'video title', default=u'video', flags=re.DOTALL)
|
||||
video_title = self._html_search_regex(
|
||||
r'(?s)<title>(.*?)</title>', webpage, u'video title',
|
||||
default=u'video')
|
||||
|
||||
# video uploader is domain name
|
||||
video_uploader = self._search_regex(
|
||||
r'^(?:https?://)?([^/]*)/.*', url, u'video uploader')
|
||||
|
||||
# Look for BrightCove:
|
||||
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
||||
@@ -188,13 +193,35 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for embedded YouTube player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube.com/embed/.+?)\1', webpage)
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/embed/.+?)\1', webpage)
|
||||
if matches:
|
||||
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
|
||||
for tuppl in matches]
|
||||
return self.playlist_result(
|
||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||
|
||||
# Look for embedded Dailymotion player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
|
||||
if matches:
|
||||
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion')
|
||||
for tuppl in matches]
|
||||
return self.playlist_result(
|
||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||
|
||||
# Look for embedded Wistia player
|
||||
match = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
|
||||
if match:
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': unescapeHTML(match.group('url')),
|
||||
'ie_key': 'Wistia',
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'id': video_id,
|
||||
}
|
||||
|
||||
# Look for Bandcamp pages with custom domain
|
||||
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
||||
if mobj is not None:
|
||||
@@ -209,7 +236,7 @@ class GenericIE(InfoExtractor):
|
||||
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
|
||||
if mobj is None:
|
||||
# Broaden the search a little bit: JWPlayer JS loader
|
||||
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"&]*)', webpage)
|
||||
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"]*)', webpage)
|
||||
if mobj is None:
|
||||
# Try to find twitter cards info
|
||||
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
|
||||
@@ -236,18 +263,11 @@ class GenericIE(InfoExtractor):
|
||||
video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
|
||||
|
||||
# here's a fun little line of code for you:
|
||||
video_extension = os.path.splitext(video_id)[1][1:]
|
||||
video_id = os.path.splitext(video_id)[0]
|
||||
|
||||
# video uploader is domain name
|
||||
video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
|
||||
url, u'video uploader')
|
||||
|
||||
return [{
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': video_uploader,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': video_extension,
|
||||
}]
|
||||
}
|
||||
|
@@ -11,7 +11,7 @@ class HotNewHipHopIE(InfoExtractor):
|
||||
u'file': u'1435540.mp3',
|
||||
u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96',
|
||||
u'info_dict': {
|
||||
u"title": u"Freddie Gibbs - Lay It Down"
|
||||
u"title": u'Freddie Gibbs "Lay It Down"'
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -103,7 +103,7 @@ class IGNIE(InfoExtractor):
|
||||
class OneUPIE(IGNIE):
|
||||
"""Extractor for 1up.com, it uses the ign videos system."""
|
||||
|
||||
_VALID_URL = r'https?://gamevideos.1up.com/(?P<type>video)/id/(?P<name_or_id>.+)'
|
||||
_VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)'
|
||||
IE_NAME = '1up.com'
|
||||
|
||||
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
|
||||
|
57
youtube_dl/extractor/imdb.py
Normal file
57
youtube_dl/extractor/imdb.py
Normal file
@@ -0,0 +1,57 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
get_element_by_attribute,
|
||||
)
|
||||
|
||||
|
||||
class ImdbIE(InfoExtractor):
|
||||
IE_NAME = u'imdb'
|
||||
IE_DESC = u'Internet Movie Database trailers'
|
||||
_VALID_URL = r'http://www\.imdb\.com/video/imdb/vi(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.imdb.com/video/imdb/vi2524815897',
|
||||
u'md5': u'9f34fa777ade3a6e57a054fdbcb3a068',
|
||||
u'info_dict': {
|
||||
u'id': u'2524815897',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'Ice Age: Continental Drift Trailer (No. 2) - IMDb',
|
||||
u'description': u'md5:9061c2219254e5d14e03c25c98e96a81',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url,video_id)
|
||||
descr = get_element_by_attribute('itemprop', 'description', webpage)
|
||||
available_formats = re.findall(
|
||||
r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage,
|
||||
flags=re.MULTILINE)
|
||||
formats = []
|
||||
for f_id, f_path in available_formats:
|
||||
f_path = f_path.strip()
|
||||
format_page = self._download_webpage(
|
||||
compat_urlparse.urljoin(url, f_path),
|
||||
u'Downloading info for %s format' % f_id)
|
||||
json_data = self._search_regex(
|
||||
r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
|
||||
format_page, u'json data', flags=re.DOTALL)
|
||||
info = json.loads(json_data)
|
||||
format_info = info['videoPlayerObject']['video']
|
||||
formats.append({
|
||||
'format_id': f_id,
|
||||
'url': format_info['url'],
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'formats': formats,
|
||||
'description': descr,
|
||||
'thumbnail': format_info['slate'],
|
||||
}
|
@@ -3,7 +3,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
|
||||
class InstagramIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?instagram.com/p/(.*?)/'
|
||||
_VALID_URL = r'(?:http://)?instagram\.com/p/(.*?)/'
|
||||
_TEST = {
|
||||
u'url': u'http://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||
u'file': u'aye83DjauH.mp4',
|
||||
|
@@ -1,5 +1,4 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -43,9 +42,8 @@ class InternetVideoArchiveIE(InfoExtractor):
|
||||
video_id = query_dic['publishedid'][0]
|
||||
url = self._build_url(query)
|
||||
|
||||
flashconfiguration_xml = self._download_webpage(url, video_id,
|
||||
flashconfiguration = self._download_xml(url, video_id,
|
||||
u'Downloading flash configuration')
|
||||
flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8'))
|
||||
file_url = flashconfiguration.find('file').text
|
||||
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
|
||||
# Replace some of the parameters in the query to get the best quality
|
||||
@@ -53,9 +51,8 @@ class InternetVideoArchiveIE(InfoExtractor):
|
||||
file_url = re.sub(r'(?<=\?)(.+)$',
|
||||
lambda m: self._clean_query(m.group()),
|
||||
file_url)
|
||||
info_xml = self._download_webpage(file_url, video_id,
|
||||
info = self._download_xml(file_url, video_id,
|
||||
u'Downloading video info')
|
||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
||||
item = info.find('channel/item')
|
||||
|
||||
def _bp(p):
|
||||
|
@@ -2,7 +2,6 @@
|
||||
|
||||
import json
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
@@ -32,12 +31,9 @@ class JeuxVideoIE(InfoExtractor):
|
||||
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
|
||||
xml_link, u'video ID')
|
||||
|
||||
xml_config = self._download_webpage(
|
||||
config = self._download_xml(
|
||||
xml_link, title, u'Downloading XML config')
|
||||
config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
|
||||
info_json = self._search_regex(
|
||||
r'(?sm)<format\.json>(.*?)</format\.json>',
|
||||
xml_config, u'JSON information')
|
||||
info_json = config.find('format.json').text
|
||||
info = json.loads(info_json)['versions'][0]
|
||||
|
||||
video_url = 'http://video720.jeuxvideo.com/' + info['file']
|
||||
|
@@ -8,7 +8,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
class JukeboxIE(InfoExtractor):
|
||||
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+).html'
|
||||
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
|
||||
_IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>'
|
||||
_VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"'
|
||||
_TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>'
|
||||
|
@@ -1,7 +1,6 @@
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -94,10 +93,9 @@ class JustinTVIE(InfoExtractor):
|
||||
archive_id = m.group(1)
|
||||
|
||||
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
|
||||
chapter_info_xml = self._download_webpage(api, chapter_id,
|
||||
doc = self._download_xml(api, chapter_id,
|
||||
note=u'Downloading chapter information',
|
||||
errnote=u'Chapter information download failed')
|
||||
doc = xml.etree.ElementTree.fromstring(chapter_info_xml)
|
||||
for a in doc.findall('.//archive'):
|
||||
if archive_id == a.find('./id').text:
|
||||
break
|
||||
|
@@ -8,7 +8,7 @@ from ..utils import (
|
||||
|
||||
class LiveLeakIE(InfoExtractor):
|
||||
|
||||
_VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
|
||||
_VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
|
||||
IE_NAME = u'liveleak'
|
||||
_TEST = {
|
||||
u'url': u'http://www.liveleak.com/view?i=757_1364311680',
|
||||
|
@@ -1,6 +1,5 @@
|
||||
import re
|
||||
import json
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -12,7 +11,7 @@ from ..utils import (
|
||||
|
||||
class LivestreamIE(InfoExtractor):
|
||||
IE_NAME = u'livestream'
|
||||
_VALID_URL = r'http://new.livestream.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
|
||||
_VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
|
||||
_TEST = {
|
||||
u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
|
||||
u'file': u'4719370.mp4',
|
||||
@@ -80,8 +79,7 @@ class LivestreamOriginalIE(InfoExtractor):
|
||||
user = mobj.group('user')
|
||||
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
|
||||
|
||||
api_response = self._download_webpage(api_url, video_id)
|
||||
info = xml.etree.ElementTree.fromstring(api_response.encode('utf-8'))
|
||||
info = self._download_xml(api_url, video_id)
|
||||
item = info.find('channel').find('item')
|
||||
ns = {'media': 'http://search.yahoo.com/mrss'}
|
||||
thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
|
||||
|
@@ -1,11 +1,8 @@
|
||||
import re
|
||||
import socket
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_parse_qs,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_str,
|
||||
@@ -69,6 +66,21 @@ class MetacafeIE(InfoExtractor):
|
||||
u'age_limit': 18,
|
||||
},
|
||||
},
|
||||
# cbs video
|
||||
{
|
||||
u'url': u'http://www.metacafe.com/watch/cb-0rOxMBabDXN6/samsung_galaxy_note_2_samsungs_next_generation_phablet/',
|
||||
u'info_dict': {
|
||||
u'id': u'0rOxMBabDXN6',
|
||||
u'ext': u'flv',
|
||||
u'title': u'Samsung Galaxy Note 2: Samsung\'s next-generation phablet',
|
||||
u'description': u'md5:54d49fac53d26d5a0aaeccd061ada09d',
|
||||
u'duration': 129,
|
||||
},
|
||||
u'params': {
|
||||
# rtmp download
|
||||
u'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@@ -78,12 +90,8 @@ class MetacafeIE(InfoExtractor):
|
||||
|
||||
def _real_initialize(self):
|
||||
# Retrieve disclaimer
|
||||
request = compat_urllib_request.Request(self._DISCLAIMER)
|
||||
try:
|
||||
self.report_disclaimer()
|
||||
compat_urllib_request.urlopen(request).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to retrieve disclaimer: %s' % compat_str(err))
|
||||
self.report_disclaimer()
|
||||
self._download_webpage(self._DISCLAIMER, None, False, u'Unable to retrieve disclaimer')
|
||||
|
||||
# Confirm age
|
||||
disclaimer_form = {
|
||||
@@ -92,11 +100,8 @@ class MetacafeIE(InfoExtractor):
|
||||
}
|
||||
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
try:
|
||||
self.report_age_confirmation()
|
||||
compat_urllib_request.urlopen(request).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
|
||||
self.report_age_confirmation()
|
||||
self._download_webpage(request, None, False, u'Unable to confirm age')
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Extract id and simplified title from URL
|
||||
@@ -106,10 +111,16 @@ class MetacafeIE(InfoExtractor):
|
||||
|
||||
video_id = mobj.group(1)
|
||||
|
||||
# Check if video comes from YouTube
|
||||
mobj2 = re.match(r'^yt-(.*)$', video_id)
|
||||
if mobj2 is not None:
|
||||
return [self.url_result('http://www.youtube.com/watch?v=%s' % mobj2.group(1), 'Youtube')]
|
||||
# the video may come from an external site
|
||||
m_external = re.match('^(\w{2})-(.*)$', video_id)
|
||||
if m_external is not None:
|
||||
prefix, ext_id = m_external.groups()
|
||||
# Check if video comes from YouTube
|
||||
if prefix == 'yt':
|
||||
return self.url_result('http://www.youtube.com/watch?v=%s' % ext_id, 'Youtube')
|
||||
# CBS videos use theplatform.com
|
||||
if prefix == 'cb':
|
||||
return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
|
||||
|
@@ -43,13 +43,10 @@ class MetacriticIE(InfoExtractor):
|
||||
description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
|
||||
webpage, u'description', flags=re.DOTALL)
|
||||
|
||||
info = {
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': clip.find('title').text,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'duration': int(clip.find('duration').text),
|
||||
}
|
||||
# TODO: Remove when #980 has been merged
|
||||
info.update(formats[-1])
|
||||
return info
|
||||
|
@@ -1,13 +1,10 @@
|
||||
import json
|
||||
import re
|
||||
import socket
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
unified_strdate,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@@ -31,9 +28,11 @@ class MixcloudIE(InfoExtractor):
|
||||
"""Returns 1st active url from list"""
|
||||
for url in url_list:
|
||||
try:
|
||||
compat_urllib_request.urlopen(url)
|
||||
# We only want to know if the request succeed
|
||||
# don't download the whole file
|
||||
self._request_webpage(url, None, False)
|
||||
return url
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error):
|
||||
except ExtractorError:
|
||||
url = None
|
||||
|
||||
return None
|
||||
@@ -60,7 +59,7 @@ class MixcloudIE(InfoExtractor):
|
||||
'title': info['name'],
|
||||
'url': final_song_url,
|
||||
'ext': 'mp3',
|
||||
'description': info['description'],
|
||||
'description': info.get('description'),
|
||||
'thumbnail': info['pictures'].get('extra_large'),
|
||||
'uploader': info['user']['name'],
|
||||
'uploader_id': info['user']['username'],
|
||||
|
@@ -10,35 +10,8 @@ from ..utils import (
|
||||
def _media_xml_tag(tag):
|
||||
return '{http://search.yahoo.com/mrss/}%s' % tag
|
||||
|
||||
class MTVIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
|
||||
|
||||
_FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
|
||||
u'file': u'853555.mp4',
|
||||
u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
|
||||
u'info_dict': {
|
||||
u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
|
||||
u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
|
||||
},
|
||||
},
|
||||
{
|
||||
u'add_ie': ['Vevo'],
|
||||
u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
|
||||
u'file': u'USCJY1331283.mp4',
|
||||
u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
|
||||
u'info_dict': {
|
||||
u'title': u'Everything Has Changed',
|
||||
u'upload_date': u'20130606',
|
||||
u'uploader': u'Taylor Swift',
|
||||
},
|
||||
u'skip': u'VEVO is only available in some countries',
|
||||
},
|
||||
]
|
||||
|
||||
class MTVServicesInfoExtractor(InfoExtractor):
|
||||
@staticmethod
|
||||
def _id_from_uri(uri):
|
||||
return uri.split(':')[-1]
|
||||
@@ -53,7 +26,12 @@ class MTVIE(InfoExtractor):
|
||||
return base + m.group('finalid')
|
||||
|
||||
def _get_thumbnail_url(self, uri, itemdoc):
|
||||
return 'http://mtv.mtvnimages.com/uri/' + uri
|
||||
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
|
||||
thumb_node = itemdoc.find(search_path)
|
||||
if thumb_node is None:
|
||||
return None
|
||||
else:
|
||||
return thumb_node.attrib['url']
|
||||
|
||||
def _extract_video_formats(self, metadataXml):
|
||||
if '/error_country_block.swf' in metadataXml:
|
||||
@@ -93,7 +71,7 @@ class MTVIE(InfoExtractor):
|
||||
else:
|
||||
description = None
|
||||
|
||||
info = {
|
||||
return {
|
||||
'title': itemdoc.find('title').text,
|
||||
'formats': self._extract_video_formats(mediagen_page),
|
||||
'id': video_id,
|
||||
@@ -101,19 +79,46 @@ class MTVIE(InfoExtractor):
|
||||
'description': description,
|
||||
}
|
||||
|
||||
# TODO: Remove when #980 has been merged
|
||||
info.update(info['formats'][-1])
|
||||
|
||||
return info
|
||||
|
||||
def _get_videos_info(self, uri):
|
||||
video_id = self._id_from_uri(uri)
|
||||
data = compat_urllib_parse.urlencode({'uri': uri})
|
||||
infoXml = self._download_webpage(self._FEED_URL +'?' + data, video_id,
|
||||
idoc = self._download_xml(self._FEED_URL +'?' + data, video_id,
|
||||
u'Downloading info')
|
||||
idoc = xml.etree.ElementTree.fromstring(infoXml.encode('utf-8'))
|
||||
return [self._get_video_info(item) for item in idoc.findall('.//item')]
|
||||
|
||||
|
||||
class MTVIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
|
||||
|
||||
_FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
|
||||
u'file': u'853555.mp4',
|
||||
u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
|
||||
u'info_dict': {
|
||||
u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
|
||||
u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
|
||||
},
|
||||
},
|
||||
{
|
||||
u'add_ie': ['Vevo'],
|
||||
u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
|
||||
u'file': u'USCJY1331283.mp4',
|
||||
u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
|
||||
u'info_dict': {
|
||||
u'title': u'Everything Has Changed',
|
||||
u'upload_date': u'20130606',
|
||||
u'uploader': u'Taylor Swift',
|
||||
},
|
||||
u'skip': u'VEVO is only available in some countries',
|
||||
},
|
||||
]
|
||||
|
||||
def _get_thumbnail_url(self, uri, itemdoc):
|
||||
return 'http://mtv.mtvnimages.com/uri/' + uri
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
|
@@ -9,7 +9,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class MuzuTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.muzu.tv/(.+?)/(.+?)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://www\.muzu\.tv/(.+?)/(.+?)/(?P<id>\d+)'
|
||||
IE_NAME = u'muzu.tv'
|
||||
|
||||
_TEST = {
|
||||
|
@@ -1,5 +1,4 @@
|
||||
import os.path
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -10,7 +9,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class MySpassIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www.myspass.de/.*'
|
||||
_VALID_URL = r'http://www\.myspass\.de/.*'
|
||||
_TEST = {
|
||||
u'url': u'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
|
||||
u'file': u'11741.mp4',
|
||||
@@ -33,8 +32,7 @@ class MySpassIE(InfoExtractor):
|
||||
|
||||
# get metadata
|
||||
metadata_url = META_DATA_URL_TEMPLATE % video_id
|
||||
metadata_text = self._download_webpage(metadata_url, video_id)
|
||||
metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
|
||||
metadata = self._download_xml(metadata_url, video_id)
|
||||
|
||||
# extract values from metadata
|
||||
url_flv_el = metadata.find('url_flv')
|
||||
|
@@ -1,6 +1,5 @@
|
||||
# encoding: utf-8
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -38,14 +37,12 @@ class NaverIE(InfoExtractor):
|
||||
'protocol': 'p2p',
|
||||
'inKey': key,
|
||||
})
|
||||
info_xml = self._download_webpage(
|
||||
info = self._download_xml(
|
||||
'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
|
||||
video_id, u'Downloading video info')
|
||||
urls_xml = self._download_webpage(
|
||||
urls = self._download_xml(
|
||||
'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
|
||||
video_id, u'Downloading video formats info')
|
||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
||||
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
|
||||
|
||||
formats = []
|
||||
for format_el in urls.findall('EncodingOptions/EncodingOption'):
|
||||
@@ -59,7 +56,7 @@ class NaverIE(InfoExtractor):
|
||||
'height': int(format_el.find('height').text),
|
||||
})
|
||||
|
||||
info = {
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info.find('Subject').text,
|
||||
'formats': formats,
|
||||
@@ -68,6 +65,3 @@ class NaverIE(InfoExtractor):
|
||||
'upload_date': info.find('WriteDate').text.replace('.', ''),
|
||||
'view_count': int(info.find('PlayCount').text),
|
||||
}
|
||||
# TODO: Remove when #980 has been merged
|
||||
info.update(formats[-1])
|
||||
return info
|
||||
|
@@ -1,5 +1,4 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import find_xpath_attr, compat_str
|
||||
@@ -21,8 +20,8 @@ class NBCNewsIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
|
||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video')
|
||||
all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
|
||||
info = all_info.find('video')
|
||||
|
||||
return {'id': video_id,
|
||||
'title': info.find('headline').text,
|
||||
|
@@ -1,6 +1,5 @@
|
||||
import re
|
||||
import json
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -26,9 +25,8 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
||||
'path': initial_video_url.replace('.mp4', '_sd.mp4'),
|
||||
})
|
||||
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
|
||||
path_response = self._download_webpage(path_url, video_id,
|
||||
path_doc = self._download_xml(path_url, video_id,
|
||||
u'Downloading final video url')
|
||||
path_doc = xml.etree.ElementTree.fromstring(path_response)
|
||||
video_url = path_doc.find('path').text
|
||||
|
||||
join = compat_urlparse.urljoin
|
||||
|
@@ -2,7 +2,6 @@
|
||||
|
||||
import re
|
||||
import socket
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -81,7 +80,7 @@ class NiconicoIE(InfoExtractor):
|
||||
# the cookies in order to be able to download the info webpage
|
||||
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
|
||||
|
||||
video_info_webpage = self._download_webpage(
|
||||
video_info = self._download_xml(
|
||||
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
|
||||
note=u'Downloading video info page')
|
||||
|
||||
@@ -92,7 +91,6 @@ class NiconicoIE(InfoExtractor):
|
||||
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
|
||||
|
||||
# Start extracting information
|
||||
video_info = xml.etree.ElementTree.fromstring(video_info_webpage)
|
||||
video_title = video_info.find('.//title').text
|
||||
video_extension = video_info.find('.//movie_type').text
|
||||
video_format = video_extension.upper()
|
||||
@@ -107,13 +105,11 @@ class NiconicoIE(InfoExtractor):
|
||||
video_uploader = video_uploader_id
|
||||
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
|
||||
try:
|
||||
user_info_webpage = self._download_webpage(
|
||||
user_info = self._download_xml(
|
||||
url, video_id, note=u'Downloading user information')
|
||||
video_uploader = user_info.find('.//nickname').text
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
|
||||
else:
|
||||
user_info = xml.etree.ElementTree.fromstring(user_info_webpage)
|
||||
video_uploader = user_info.find('.//nickname').text
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
43
youtube_dl/extractor/ninegag.py
Normal file
43
youtube_dl/extractor/ninegag.py
Normal file
@@ -0,0 +1,43 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class NineGagIE(InfoExtractor):
|
||||
IE_NAME = '9gag'
|
||||
_VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
u"url": u"http://9gag.tv/v/1912",
|
||||
u"file": u"1912.mp4",
|
||||
u"info_dict": {
|
||||
u"description": u"This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
|
||||
u"title": u"\"People Are Awesome 2013\" Is Absolutely Awesome"
|
||||
},
|
||||
u'add_ie': [u'Youtube']
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data_json = self._html_search_regex(r'''(?x)
|
||||
<div\s*id="tv-video"\s*data-video-source="youtube"\s*
|
||||
data-video-meta="([^"]+)"''', webpage, u'video metadata')
|
||||
|
||||
data = json.loads(data_json)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': data['youtubeVideoId'],
|
||||
'ie_key': 'Youtube',
|
||||
'id': video_id,
|
||||
'title': data['title'],
|
||||
'description': data['description'],
|
||||
'view_count': int(data['view_count']),
|
||||
'like_count': int(data['statistic']['like']),
|
||||
'dislike_count': int(data['statistic']['dislike']),
|
||||
'thumbnail': data['thumbnail_url'],
|
||||
}
|
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
class ORFIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://tvthek\.orf\.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@@ -5,7 +5,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class PBSIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video.pbs.org/video/(?P<id>\d+)/?'
|
||||
_VALID_URL = r'https?://video\.pbs\.org/video/(?P<id>\d+)/?'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://video.pbs.org/video/2365006249/',
|
||||
|
49
youtube_dl/extractor/podomatic.py
Normal file
49
youtube_dl/extractor/podomatic.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class PodomaticIE(InfoExtractor):
|
||||
IE_NAME = 'podomatic'
|
||||
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
|
||||
|
||||
_TEST = {
|
||||
u"url": u"http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
|
||||
u"file": u"2009-01-02T16_03_35-08_00.mp3",
|
||||
u"md5": u"84bb855fcf3429e6bf72460e1eed782d",
|
||||
u"info_dict": {
|
||||
u"uploader": u"Science Teaching Tips",
|
||||
u"uploader_id": u"scienceteachingtips",
|
||||
u"title": u"64. When the Moon Hits Your Eye",
|
||||
u"duration": 446,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
channel = mobj.group('channel')
|
||||
|
||||
json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' +
|
||||
'?permalink=true&rtmp=0') %
|
||||
(mobj.group('proto'), channel, video_id))
|
||||
data_json = self._download_webpage(
|
||||
json_url, video_id, note=u'Downloading video info')
|
||||
data = json.loads(data_json)
|
||||
|
||||
video_url = data['downloadLink']
|
||||
uploader = data['podcast']
|
||||
title = data['title']
|
||||
thumbnail = data['imageLocation']
|
||||
duration = int(data['length'] / 1000.0)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'uploader': uploader,
|
||||
'uploader_id': channel,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
}
|
51
youtube_dl/extractor/pyvideo.py
Normal file
51
youtube_dl/extractor/pyvideo.py
Normal file
@@ -0,0 +1,51 @@
|
||||
import re
|
||||
import os
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class PyvideoIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
|
||||
_TESTS = [{
|
||||
u'url': u'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
|
||||
u'file': u'24_4WWkSmNo.mp4',
|
||||
u'md5': u'de317418c8bc76b1fd8633e4f32acbc6',
|
||||
u'info_dict': {
|
||||
u"title": u"Become a logging expert in 30 minutes",
|
||||
u"description": u"md5:9665350d466c67fb5b1598de379021f7",
|
||||
u"upload_date": u"20130320",
|
||||
u"uploader": u"NextDayVideo",
|
||||
u"uploader_id": u"NextDayVideo",
|
||||
},
|
||||
u'add_ie': ['Youtube'],
|
||||
},
|
||||
{
|
||||
u'url': u'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
|
||||
u'md5': u'5fe1c7e0a8aa5570330784c847ff6d12',
|
||||
u'info_dict': {
|
||||
u'id': u'2542',
|
||||
u'ext': u'm4v',
|
||||
u'title': u'Gloriajw-SpotifyWithErikBernhardsson182',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
|
||||
|
||||
if m_youtube is not None:
|
||||
return self.url_result(m_youtube.group(1), 'Youtube')
|
||||
|
||||
title = self._html_search_regex(r'<div class="section">.*?<h3>([^>]+?)</h3>',
|
||||
webpage, u'title', flags=re.DOTALL)
|
||||
video_url = self._search_regex([r'<source src="(.*?)"',
|
||||
r'<dt>Download</dt>.*?<a href="(.+?)"'],
|
||||
webpage, u'video url', flags=re.DOTALL)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': os.path.splitext(title)[0],
|
||||
'url': video_url,
|
||||
}
|
@@ -30,7 +30,7 @@ class RedTubeIE(InfoExtractor):
|
||||
r'<source src="(.+?)" type="video/mp4">', webpage, u'video URL')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
|
||||
r'<h1 class="videoTitle[^"]*">(.+?)</h1>',
|
||||
webpage, u'title')
|
||||
|
||||
# No self-labeling, but they describe themselves as
|
||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class RutubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://rutube.ru/video/(?P<long_id>\w+)'
|
||||
_VALID_URL = r'https?://rutube\.ru/video/(?P<long_id>\w+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
|
||||
|
@@ -1,7 +1,6 @@
|
||||
# coding: utf-8
|
||||
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -35,12 +34,11 @@ class SinaIE(InfoExtractor):
|
||||
|
||||
def _extract_video(self, video_id):
|
||||
data = compat_urllib_parse.urlencode({'vid': video_id})
|
||||
url_page = self._download_webpage('http://v.iask.com/v_play.php?%s' % data,
|
||||
url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
|
||||
video_id, u'Downloading video url')
|
||||
image_page = self._download_webpage(
|
||||
'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
|
||||
video_id, u'Downloading thumbnail info')
|
||||
url_doc = xml.etree.ElementTree.fromstring(url_page.encode('utf-8'))
|
||||
|
||||
return {'id': video_id,
|
||||
'url': url_doc.find('./durl/url').text,
|
||||
|
@@ -4,7 +4,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class SlashdotIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)'
|
||||
_VALID_URL = r'https?://tv\.slashdot\.org/video/\?embed=(?P<id>.*?)(&|$)'
|
||||
|
||||
_TEST = {
|
||||
u'add_ie': ['Ooyala'],
|
||||
|
251
youtube_dl/extractor/smotri.py
Normal file
251
youtube_dl/extractor/smotri.py
Normal file
@@ -0,0 +1,251 @@
|
||||
# encoding: utf-8
|
||||
|
||||
import re
|
||||
import json
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
|
||||
class SmotriIE(InfoExtractor):
|
||||
IE_DESC = u'Smotri.com'
|
||||
IE_NAME = u'smotri'
|
||||
_VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/video/view/\?id=(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4}))'
|
||||
|
||||
_TESTS = [
|
||||
# real video id 2610366
|
||||
{
|
||||
u'url': u'http://smotri.com/video/view/?id=v261036632ab',
|
||||
u'file': u'v261036632ab.mp4',
|
||||
u'md5': u'2a7b08249e6f5636557579c368040eb9',
|
||||
u'info_dict': {
|
||||
u'title': u'катастрофа с камер видеонаблюдения',
|
||||
u'uploader': u'rbc2008',
|
||||
u'uploader_id': u'rbc08',
|
||||
u'upload_date': u'20131118',
|
||||
u'description': u'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения',
|
||||
u'thumbnail': u'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
|
||||
},
|
||||
},
|
||||
# real video id 57591
|
||||
{
|
||||
u'url': u'http://smotri.com/video/view/?id=v57591cb20',
|
||||
u'file': u'v57591cb20.flv',
|
||||
u'md5': u'830266dfc21f077eac5afd1883091bcd',
|
||||
u'info_dict': {
|
||||
u'title': u'test',
|
||||
u'uploader': u'Support Photofile@photofile',
|
||||
u'uploader_id': u'support-photofile',
|
||||
u'upload_date': u'20070704',
|
||||
u'description': u'test, видео test',
|
||||
u'thumbnail': u'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
|
||||
},
|
||||
},
|
||||
# video-password
|
||||
{
|
||||
u'url': u'http://smotri.com/video/view/?id=v1390466a13c',
|
||||
u'file': u'v1390466a13c.mp4',
|
||||
u'md5': u'f6331cef33cad65a0815ee482a54440b',
|
||||
u'info_dict': {
|
||||
u'title': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
|
||||
u'uploader': u'timoxa40',
|
||||
u'uploader_id': u'timoxa40',
|
||||
u'upload_date': u'20100404',
|
||||
u'thumbnail': u'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
|
||||
u'description': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
|
||||
},
|
||||
u'params': {
|
||||
u'videopassword': u'qwerty',
|
||||
},
|
||||
},
|
||||
# age limit + video-password
|
||||
{
|
||||
u'url': u'http://smotri.com/video/view/?id=v15408898bcf',
|
||||
u'file': u'v15408898bcf.flv',
|
||||
u'md5': u'91e909c9f0521adf5ee86fbe073aad70',
|
||||
u'info_dict': {
|
||||
u'title': u'этот ролик не покажут по ТВ',
|
||||
u'uploader': u'zzxxx',
|
||||
u'uploader_id': u'ueggb',
|
||||
u'upload_date': u'20101001',
|
||||
u'thumbnail': u'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
|
||||
u'age_limit': 18,
|
||||
u'description': u'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ',
|
||||
},
|
||||
u'params': {
|
||||
u'videopassword': u'333'
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
_SUCCESS = 0
|
||||
_PASSWORD_NOT_VERIFIED = 1
|
||||
_PASSWORD_DETECTED = 2
|
||||
_VIDEO_NOT_FOUND = 3
|
||||
|
||||
def _search_meta(self, name, html, display_name=None):
|
||||
if display_name is None:
|
||||
display_name = name
|
||||
return self._html_search_regex(
|
||||
r'<meta itemprop="%s" content="([^"]+)" />' % re.escape(name),
|
||||
html, display_name, fatal=False)
|
||||
return self._html_search_meta(name, html, display_name)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
real_video_id = mobj.group('realvideoid')
|
||||
|
||||
# Download video JSON data
|
||||
video_json_url = 'http://smotri.com/vt.php?id=%s' % real_video_id
|
||||
video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON')
|
||||
video_json = json.loads(video_json_page)
|
||||
|
||||
status = video_json['status']
|
||||
if status == self._VIDEO_NOT_FOUND:
|
||||
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
|
||||
elif status == self._PASSWORD_DETECTED: # The video is protected by a password, retry with
|
||||
# video-password set
|
||||
video_password = self._downloader.params.get('videopassword', None)
|
||||
if not video_password:
|
||||
raise ExtractorError(u'This video is protected by a password, use the --video-password option', expected=True)
|
||||
video_json_url += '&md5pass=%s' % hashlib.md5(video_password.encode('utf-8')).hexdigest()
|
||||
video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON (video-password set)')
|
||||
video_json = json.loads(video_json_page)
|
||||
status = video_json['status']
|
||||
if status == self._PASSWORD_NOT_VERIFIED:
|
||||
raise ExtractorError(u'Video password is invalid', expected=True)
|
||||
|
||||
if status != self._SUCCESS:
|
||||
raise ExtractorError(u'Unexpected status value %s' % status)
|
||||
|
||||
# Extract the URL of the video
|
||||
video_url = video_json['file_data']
|
||||
|
||||
# Video JSON does not provide enough meta data
|
||||
# We will extract some from the video web page instead
|
||||
video_page_url = 'http://' + mobj.group('url')
|
||||
video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page')
|
||||
|
||||
# Adult content
|
||||
if re.search(u'EroConfirmText">', video_page) is not None:
|
||||
self.report_age_confirmation()
|
||||
confirm_string = self._html_search_regex(
|
||||
r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
|
||||
video_page, u'confirm string')
|
||||
confirm_url = video_page_url + '&confirm=%s' % confirm_string
|
||||
video_page = self._download_webpage(confirm_url, video_id, u'Downloading video page (age confirmed)')
|
||||
adult_content = True
|
||||
else:
|
||||
adult_content = False
|
||||
|
||||
# Extract the rest of meta data
|
||||
video_title = self._search_meta(u'name', video_page, u'title')
|
||||
if not video_title:
|
||||
video_title = video_url.rsplit('/', 1)[-1]
|
||||
|
||||
video_description = self._search_meta(u'description', video_page)
|
||||
END_TEXT = u' на сайте Smotri.com'
|
||||
if video_description.endswith(END_TEXT):
|
||||
video_description = video_description[:-len(END_TEXT)]
|
||||
START_TEXT = u'Смотреть онлайн ролик '
|
||||
if video_description.startswith(START_TEXT):
|
||||
video_description = video_description[len(START_TEXT):]
|
||||
video_thumbnail = self._search_meta(u'thumbnail', video_page)
|
||||
|
||||
upload_date_str = self._search_meta(u'uploadDate', video_page, u'upload date')
|
||||
upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
|
||||
video_upload_date = (
|
||||
(
|
||||
upload_date_m.group('year') +
|
||||
upload_date_m.group('month') +
|
||||
upload_date_m.group('day')
|
||||
)
|
||||
if upload_date_m else None
|
||||
)
|
||||
|
||||
duration_str = self._search_meta(u'duration', video_page)
|
||||
duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
|
||||
video_duration = (
|
||||
(
|
||||
(int(duration_m.group('hours')) * 60 * 60) +
|
||||
(int(duration_m.group('minutes')) * 60) +
|
||||
int(duration_m.group('seconds'))
|
||||
)
|
||||
if duration_m else None
|
||||
)
|
||||
|
||||
video_uploader = self._html_search_regex(
|
||||
u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',
|
||||
video_page, u'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL)
|
||||
|
||||
video_uploader_id = self._html_search_regex(
|
||||
u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">',
|
||||
video_page, u'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL)
|
||||
|
||||
video_view_count = self._html_search_regex(
|
||||
u'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
|
||||
video_page, u'view count', fatal=False, flags=re.MULTILINE|re.DOTALL)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
'thumbnail': video_thumbnail,
|
||||
'description': video_description,
|
||||
'uploader': video_uploader,
|
||||
'upload_date': video_upload_date,
|
||||
'uploader_id': video_uploader_id,
|
||||
'video_duration': video_duration,
|
||||
'view_count': video_view_count,
|
||||
'age_limit': 18 if adult_content else 0,
|
||||
'video_page_url': video_page_url
|
||||
}
|
||||
|
||||
|
||||
class SmotriCommunityIE(InfoExtractor):
|
||||
IE_DESC = u'Smotri.com community videos'
|
||||
IE_NAME = u'smotri:community'
|
||||
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
community_id = mobj.group('communityid')
|
||||
|
||||
url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id
|
||||
rss = self._download_xml(url, community_id, u'Downloading community RSS')
|
||||
|
||||
entries = [self.url_result(video_url.text, 'Smotri')
|
||||
for video_url in rss.findall('./channel/item/link')]
|
||||
|
||||
description_text = rss.find('./channel/description').text
|
||||
community_title = self._html_search_regex(
|
||||
u'^Видео сообщества "([^"]+)"$', description_text, u'community title')
|
||||
|
||||
return self.playlist_result(entries, community_id, community_title)
|
||||
|
||||
|
||||
class SmotriUserIE(InfoExtractor):
|
||||
IE_DESC = u'Smotri.com user videos'
|
||||
IE_NAME = u'smotri:user'
|
||||
_VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
user_id = mobj.group('userid')
|
||||
|
||||
url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id
|
||||
rss = self._download_xml(url, user_id, u'Downloading user RSS')
|
||||
|
||||
entries = [self.url_result(video_url.text, 'Smotri')
|
||||
for video_url in rss.findall('./channel/item/link')]
|
||||
|
||||
description_text = rss.find('./channel/description').text
|
||||
user_nickname = self._html_search_regex(
|
||||
u'^Видео режиссера (.*)$', description_text,
|
||||
u'user nickname')
|
||||
|
||||
return self.playlist_result(entries, user_id, user_nickname)
|
@@ -25,7 +25,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
_VALID_URL = r'''^(?:https?://)?
|
||||
(?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)
|
||||
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
|
||||
|(?P<widget>w.soundcloud.com/player/?.*?url=.*)
|
||||
|(?P<widget>w\.soundcloud\.com/player/?.*?url=.*)
|
||||
)
|
||||
'''
|
||||
IE_NAME = u'soundcloud'
|
||||
@@ -76,44 +76,78 @@ class SoundcloudIE(InfoExtractor):
|
||||
def _extract_info_dict(self, info, full_title=None, quiet=False):
|
||||
track_id = compat_str(info['id'])
|
||||
name = full_title or track_id
|
||||
if quiet == False:
|
||||
if quiet:
|
||||
self.report_extraction(name)
|
||||
|
||||
thumbnail = info['artwork_url']
|
||||
if thumbnail is not None:
|
||||
thumbnail = thumbnail.replace('-large', '-t500x500')
|
||||
ext = info.get('original_format', u'mp3')
|
||||
result = {
|
||||
'id': track_id,
|
||||
'id': track_id,
|
||||
'uploader': info['user']['username'],
|
||||
'upload_date': unified_strdate(info['created_at']),
|
||||
'title': info['title'],
|
||||
'ext': info.get('original_format', u'mp3'),
|
||||
'title': info['title'],
|
||||
'description': info['description'],
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
if info.get('downloadable', False):
|
||||
# We can build a direct link to the song
|
||||
result['url'] = 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(track_id, self._CLIENT_ID)
|
||||
format_url = (
|
||||
u'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
|
||||
track_id, self._CLIENT_ID))
|
||||
result['formats'] = [{
|
||||
'format_id': 'download',
|
||||
'ext': ext,
|
||||
'url': format_url,
|
||||
'vcodec': 'none',
|
||||
}]
|
||||
else:
|
||||
# We have to retrieve the url
|
||||
stream_json = self._download_webpage(
|
||||
'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._IPHONE_CLIENT_ID),
|
||||
track_id, u'Downloading track url')
|
||||
# There should be only one entry in the dictionary
|
||||
key, stream_url = list(json.loads(stream_json).items())[0]
|
||||
if key.startswith(u'http'):
|
||||
result['url'] = stream_url
|
||||
elif key.startswith(u'rtmp'):
|
||||
# The url doesn't have an rtmp app, we have to extract the playpath
|
||||
url, path = stream_url.split('mp3:', 1)
|
||||
result.update({
|
||||
'url': url,
|
||||
'play_path': 'mp3:' + path,
|
||||
})
|
||||
else:
|
||||
|
||||
formats = []
|
||||
format_dict = json.loads(stream_json)
|
||||
for key, stream_url in format_dict.items():
|
||||
if key.startswith(u'http'):
|
||||
formats.append({
|
||||
'format_id': key,
|
||||
'ext': ext,
|
||||
'url': stream_url,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
elif key.startswith(u'rtmp'):
|
||||
# The url doesn't have an rtmp app, we have to extract the playpath
|
||||
url, path = stream_url.split('mp3:', 1)
|
||||
formats.append({
|
||||
'format_id': key,
|
||||
'url': url,
|
||||
'play_path': 'mp3:' + path,
|
||||
'ext': ext,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
if not formats:
|
||||
# We fallback to the stream_url in the original info, this
|
||||
# cannot be always used, sometimes it can give an HTTP 404 error
|
||||
result['url'] = info['stream_url'] + '?client_id=' + self._CLIENT_ID,
|
||||
formats.append({
|
||||
'format_id': u'fallback',
|
||||
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
|
||||
'ext': ext,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
def format_pref(f):
|
||||
if f['format_id'].startswith('http'):
|
||||
return 2
|
||||
if f['format_id'].startswith('rtmp'):
|
||||
return 1
|
||||
return 0
|
||||
|
||||
formats.sort(key=format_pref)
|
||||
result['formats'] = formats
|
||||
|
||||
return result
|
||||
|
||||
@@ -183,7 +217,7 @@ class SoundcloudSetIE(SoundcloudIE):
|
||||
|
||||
|
||||
class SoundcloudUserIE(SoundcloudIE):
|
||||
_VALID_URL = r'https?://(www\.)?soundcloud.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
|
||||
_VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
|
||||
IE_NAME = u'soundcloud:user'
|
||||
|
||||
# it's in tests/test_playlists.py
|
||||
|
@@ -1,15 +1,14 @@
|
||||
import re
|
||||
|
||||
from .mtv import MTVIE, _media_xml_tag
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
|
||||
|
||||
class SouthParkStudiosIE(MTVIE):
|
||||
class SouthParkStudiosIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = u'southparkstudios.com'
|
||||
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
|
||||
|
||||
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
|
||||
|
||||
# Overwrite MTVIE properties we don't want
|
||||
_TESTS = [{
|
||||
u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
|
||||
u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
|
||||
@@ -19,14 +18,6 @@ class SouthParkStudiosIE(MTVIE):
|
||||
},
|
||||
}]
|
||||
|
||||
def _get_thumbnail_url(self, uri, itemdoc):
|
||||
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
|
||||
thumb_node = itemdoc.find(search_path)
|
||||
if thumb_node is None:
|
||||
return None
|
||||
else:
|
||||
return thumb_node.attrib['url']
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
url = u'http://www.' + mobj.group(u'url')
|
||||
|
@@ -6,7 +6,7 @@ from ..utils import RegexNotFoundError, ExtractorError
|
||||
|
||||
|
||||
class SpaceIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.space\.com/\d+-(?P<title>[^/\.\?]*?)-video.html'
|
||||
_VALID_URL = r'https?://www\.space\.com/\d+-(?P<title>[^/\.\?]*?)-video\.html'
|
||||
_TEST = {
|
||||
u'add_ie': ['Brightcove'],
|
||||
u'url': u'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',
|
||||
|
@@ -1,5 +1,4 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
@@ -33,12 +32,10 @@ class SpiegelIE(InfoExtractor):
|
||||
r'<div class="module-title">(.*?)</div>', webpage, u'title')
|
||||
|
||||
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
|
||||
xml_code = self._download_webpage(
|
||||
idoc = self._download_xml(
|
||||
xml_url, video_id,
|
||||
note=u'Downloading XML', errnote=u'Failed to download XML')
|
||||
|
||||
idoc = xml.etree.ElementTree.fromstring(xml_code)
|
||||
|
||||
formats = [
|
||||
{
|
||||
'format_id': n.tag.rpartition('type')[2],
|
||||
|
@@ -1,13 +1,8 @@
|
||||
import re
|
||||
import socket
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
|
||||
ExtractorError,
|
||||
orderedSet,
|
||||
@@ -18,7 +13,7 @@ from ..utils import (
|
||||
class StanfordOpenClassroomIE(InfoExtractor):
|
||||
IE_NAME = u'stanfordoc'
|
||||
IE_DESC = u'Stanford Open ClassRoom'
|
||||
_VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
|
||||
_VALID_URL = r'^(?:https?://)?openclassroom\.stanford\.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
|
||||
_TEST = {
|
||||
u'url': u'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
|
||||
u'file': u'PracticalUnix_intro-environment.mp4',
|
||||
@@ -45,11 +40,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
|
||||
self.report_extraction(info['id'])
|
||||
baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
|
||||
xmlUrl = baseUrl + video + '.xml'
|
||||
try:
|
||||
metaXml = compat_urllib_request.urlopen(xmlUrl).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
|
||||
mdoc = xml.etree.ElementTree.fromstring(metaXml)
|
||||
mdoc = self._download_xml(xmlUrl, info['id'])
|
||||
try:
|
||||
info['title'] = mdoc.findall('./title')[0].text
|
||||
info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
|
||||
@@ -95,12 +86,9 @@ class StanfordOpenClassroomIE(InfoExtractor):
|
||||
'upload_date': None,
|
||||
}
|
||||
|
||||
self.report_download_webpage(info['id'])
|
||||
rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
|
||||
try:
|
||||
rootpage = compat_urllib_request.urlopen(rootURL).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to download course info page: ' + compat_str(err))
|
||||
rootpage = self._download_webpage(rootURL, info['id'],
|
||||
errnote=u'Unable to download course info page')
|
||||
|
||||
info['title'] = info['id']
|
||||
|
||||
|
@@ -1,5 +1,4 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -32,8 +31,7 @@ class TeamcocoIE(InfoExtractor):
|
||||
self.report_extraction(video_id)
|
||||
|
||||
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
||||
data_xml = self._download_webpage(data_url, video_id, 'Downloading data webpage')
|
||||
data = xml.etree.ElementTree.fromstring(data_xml.encode('utf-8'))
|
||||
data = self._download_xml(data_url, video_id, 'Downloading data webpage')
|
||||
|
||||
|
||||
qualities = ['500k', '480p', '1000k', '720p', '1080p']
|
||||
|
@@ -7,7 +7,7 @@ from .common import InfoExtractor
|
||||
|
||||
class TF1IE(InfoExtractor):
|
||||
"""TF1 uses the wat.tv player."""
|
||||
_VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html'
|
||||
_VALID_URL = r'http://videos\.tf1\.fr/.*-(.*?)\.html'
|
||||
_TEST = {
|
||||
u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
|
||||
u'file': u'10635995.mp4',
|
||||
|
68
youtube_dl/extractor/theplatform.py
Normal file
68
youtube_dl/extractor/theplatform.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
|
||||
|
||||
|
||||
class ThePlatformIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://link\.theplatform\.com/s/[^/]+/|theplatform:)(?P<id>[^/\?]+)'
|
||||
|
||||
_TEST = {
|
||||
# from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
|
||||
u'url': u'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
|
||||
u'info_dict': {
|
||||
u'id': u'e9I_cZgTgIPd',
|
||||
u'ext': u'flv',
|
||||
u'title': u'Blackberry\'s big, bold Z30',
|
||||
u'description': u'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
|
||||
u'duration': 247,
|
||||
},
|
||||
u'params': {
|
||||
# rtmp download
|
||||
u'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _get_info(self, video_id):
|
||||
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
|
||||
'format=smil&mbr=true'.format(video_id))
|
||||
meta = self._download_xml(smil_url, video_id)
|
||||
info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
|
||||
info_json = self._download_webpage(info_url, video_id)
|
||||
info = json.loads(info_json)
|
||||
|
||||
head = meta.find(_x('smil:head'))
|
||||
body = meta.find(_x('smil:body'))
|
||||
base_url = head.find(_x('smil:meta')).attrib['base']
|
||||
switch = body.find(_x('smil:switch'))
|
||||
formats = []
|
||||
for f in switch.findall(_x('smil:video')):
|
||||
attr = f.attrib
|
||||
formats.append({
|
||||
'url': base_url,
|
||||
'play_path': 'mp4:' + attr['src'],
|
||||
'ext': 'flv',
|
||||
'width': int(attr['width']),
|
||||
'height': int(attr['height']),
|
||||
'vbr': int(attr['system-bitrate']),
|
||||
})
|
||||
formats.sort(key=lambda f: (f['height'], f['width'], f['vbr']))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'],
|
||||
'formats': formats,
|
||||
'description': info['description'],
|
||||
'thumbnail': info['defaultThumbnailUrl'],
|
||||
'duration': info['duration']//1000,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
return self._get_info(video_id)
|
@@ -1,6 +1,5 @@
|
||||
# coding: utf-8
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -40,11 +39,9 @@ class TouTvIE(InfoExtractor):
|
||||
r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
|
||||
|
||||
streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
|
||||
streams_webpage = self._download_webpage(
|
||||
streams_doc = self._download_xml(
|
||||
streams_url, video_id, note=u'Downloading stream list')
|
||||
|
||||
streams_doc = xml.etree.ElementTree.fromstring(
|
||||
streams_webpage.encode('utf-8'))
|
||||
video_url = next(n.text
|
||||
for n in streams_doc.findall('.//choice/url')
|
||||
if u'//ad.doubleclick' not in n.text)
|
||||
|
@@ -1,6 +1,5 @@
|
||||
import json
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
@@ -36,12 +35,10 @@ class TriluliluIE(InfoExtractor):
|
||||
|
||||
format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
|
||||
u'video-formats2' % log)
|
||||
format_str = self._download_webpage(
|
||||
format_doc = self._download_xml(
|
||||
format_url, video_id,
|
||||
note=u'Downloading formats',
|
||||
errnote=u'Error while downloading formats')
|
||||
|
||||
format_doc = xml.etree.ElementTree.fromstring(format_str)
|
||||
|
||||
video_url_template = (
|
||||
u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
|
||||
@@ -58,7 +55,7 @@ class TriluliluIE(InfoExtractor):
|
||||
for fnode in format_doc.findall('./formats/format')
|
||||
]
|
||||
|
||||
info = {
|
||||
return {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
@@ -67,7 +64,3 @@ class TriluliluIE(InfoExtractor):
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
# TODO: Remove when #980 has been merged
|
||||
info.update(formats[-1])
|
||||
|
||||
return info
|
||||
|
@@ -3,7 +3,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
|
||||
class UnistraIE(InfoExtractor):
|
||||
_VALID_URL = r'http://utv.unistra.fr/(?:index|video).php\?id_video\=(\d+)'
|
||||
_VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://utv.unistra.fr/video.php?id_video=154',
|
||||
|
@@ -9,7 +9,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
class VeeHDIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://veehd.com/video/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://veehd\.com/video/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://veehd.com/video/4686958',
|
||||
|
@@ -15,7 +15,7 @@ class VevoIE(InfoExtractor):
|
||||
Accepts urls from vevo.com or in the format 'vevo:{id}'
|
||||
(currently used by MTVIE)
|
||||
"""
|
||||
_VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
|
||||
_VALID_URL = r'((http://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?)|(vevo:))(?P<id>.*?)(\?|$)'
|
||||
_TESTS = [{
|
||||
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
||||
u'file': u'GB1101300280.mp4',
|
||||
@@ -24,7 +24,7 @@ class VevoIE(InfoExtractor):
|
||||
u"upload_date": u"20130624",
|
||||
u"uploader": u"Hurts",
|
||||
u"title": u"Somebody to Die For",
|
||||
u"duration": 230,
|
||||
u"duration": 230.12,
|
||||
u"width": 1920,
|
||||
u"height": 1080,
|
||||
}
|
||||
|
@@ -6,7 +6,7 @@ from ..utils import ExtractorError
|
||||
|
||||
|
||||
class ViceIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www.vice.com/.*?/(?P<name>.+)'
|
||||
_VALID_URL = r'http://www\.vice\.com/.*?/(?P<name>.+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
|
||||
|
@@ -2,13 +2,10 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class ViddlerIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
|
||||
_VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler\.com)/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
|
||||
_TEST = {
|
||||
u"url": u"http://www.viddler.com/v/43903784",
|
||||
u'file': u'43903784.mp4',
|
||||
@@ -47,7 +44,7 @@ class ViddlerIE(InfoExtractor):
|
||||
r"thumbnail\s*:\s*'([^']*)'",
|
||||
webpage, u'thumbnail', fatal=False)
|
||||
|
||||
info = {
|
||||
return {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
@@ -56,9 +53,3 @@ class ViddlerIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
# TODO: Remove when #980 has been merged
|
||||
info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url'])
|
||||
info.update(info['formats'][-1])
|
||||
|
||||
return info
|
||||
|
@@ -1,5 +1,4 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -8,7 +7,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
class VideofyMeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(www.videofy.me/.+?|p.videofy.me/v)/(?P<id>\d+)(&|#|$)'
|
||||
_VALID_URL = r'https?://(www\.videofy\.me/.+?|p\.videofy\.me/v)/(?P<id>\d+)(&|#|$)'
|
||||
IE_NAME = u'videofy.me'
|
||||
|
||||
_TEST = {
|
||||
@@ -27,9 +26,8 @@ class VideofyMeIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
config_xml = self._download_webpage('http://sunshine.videofy.me/?videoId=%s' % video_id,
|
||||
config = self._download_xml('http://sunshine.videofy.me/?videoId=%s' % video_id,
|
||||
video_id)
|
||||
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
||||
video = config.find('video')
|
||||
sources = video.find('sources')
|
||||
url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)
|
||||
|
@@ -5,7 +5,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class VideoPremiumIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.tv/(?P<id>\w+)(?:/.*)?'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.(?:tv|me)/(?P<id>\w+)(?:/.*)?'
|
||||
_TEST = {
|
||||
u'url': u'http://videopremium.tv/4w7oadjsf156',
|
||||
u'file': u'4w7oadjsf156.f4v',
|
||||
@@ -41,4 +41,4 @@ class VideoPremiumIE(InfoExtractor):
|
||||
'player_url': "http://videopremium.tv/uplayer/uppod.swf",
|
||||
'ext': 'f4v',
|
||||
'title': video_title,
|
||||
}
|
||||
}
|
||||
|
@@ -1,6 +1,8 @@
|
||||
import re
|
||||
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
)
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
@@ -20,7 +22,8 @@ class VikiIE(SubtitlesInfoExtractor):
|
||||
u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
|
||||
u'upload_date': u'20131121',
|
||||
u'age_limit': 13,
|
||||
}
|
||||
},
|
||||
u'skip': u'Blocked in the US',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -32,11 +35,12 @@ class VikiIE(SubtitlesInfoExtractor):
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage,
|
||||
u'uploader')
|
||||
if uploader is not None:
|
||||
uploader = uploader.strip()
|
||||
uploader_m = re.search(
|
||||
r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage)
|
||||
if uploader_m is None:
|
||||
uploader = None
|
||||
else:
|
||||
uploader = uploader_m.group(1).strip()
|
||||
|
||||
rating_str = self._html_search_regex(
|
||||
r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
|
||||
@@ -51,7 +55,12 @@ class VikiIE(SubtitlesInfoExtractor):
|
||||
age_limit = RATINGS.get(rating_str)
|
||||
|
||||
info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
|
||||
info_webpage = self._download_webpage(info_url, video_id)
|
||||
info_webpage = self._download_webpage(
|
||||
info_url, video_id, note=u'Downloading info page')
|
||||
if re.match(r'\s*<div\s+class="video-error', info_webpage):
|
||||
raise ExtractorError(
|
||||
u'Video %s is blocked from your location.' % video_id,
|
||||
expected=True)
|
||||
video_url = self._html_search_regex(
|
||||
r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
|
||||
|
||||
@@ -83,7 +92,8 @@ class VikiIE(SubtitlesInfoExtractor):
|
||||
|
||||
def _get_available_subtitles(self, video_id, info_webpage):
|
||||
res = {}
|
||||
for sturl in re.findall(r'<track src="([^"]+)"/>'):
|
||||
for sturl_html in re.findall(r'<track src="([^"]+)"/>', info_webpage):
|
||||
sturl = unescapeHTML(sturl_html)
|
||||
m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
|
||||
if not m:
|
||||
continue
|
||||
|
@@ -20,7 +20,7 @@ class VimeoIE(InfoExtractor):
|
||||
"""Information extractor for vimeo.com."""
|
||||
|
||||
# _VALID_URL matches Vimeo URLs
|
||||
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
|
||||
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:.*?/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
|
||||
_NETRC_MACHINE = 'vimeo'
|
||||
IE_NAME = u'vimeo'
|
||||
_TESTS = [
|
||||
@@ -196,6 +196,16 @@ class VimeoIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3)
|
||||
|
||||
try:
|
||||
view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, u'view count'))
|
||||
like_count = int(self._search_regex(r'UserLikes:(\d+)', webpage, u'like count'))
|
||||
comment_count = int(self._search_regex(r'UserComments:(\d+)', webpage, u'comment count'))
|
||||
except RegexNotFoundError:
|
||||
# This info is only available in vimeo.com/{id} urls
|
||||
view_count = None
|
||||
like_count = None
|
||||
comment_count = None
|
||||
|
||||
# Vimeo specific: extract request signature and timestamp
|
||||
sig = config['request']['signature']
|
||||
timestamp = config['request']['timestamp']
|
||||
@@ -242,6 +252,9 @@ class VimeoIE(InfoExtractor):
|
||||
'description': video_description,
|
||||
'formats': formats,
|
||||
'webpage_url': url,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'comment_count': comment_count,
|
||||
}
|
||||
|
||||
|
||||
@@ -249,25 +262,77 @@ class VimeoChannelIE(InfoExtractor):
|
||||
IE_NAME = u'vimeo:channel'
|
||||
_VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P<id>[^/]+)'
|
||||
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
|
||||
_TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
channel_id = mobj.group('id')
|
||||
def _page_url(self, base_url, pagenum):
|
||||
return '%s/videos/page:%d/' % (base_url, pagenum)
|
||||
|
||||
def _extract_list_title(self, webpage):
|
||||
return self._html_search_regex(self._TITLE_RE, webpage, u'list title')
|
||||
|
||||
def _extract_videos(self, list_id, base_url):
|
||||
video_ids = []
|
||||
|
||||
for pagenum in itertools.count(1):
|
||||
webpage = self._download_webpage('http://vimeo.com/channels/%s/videos/page:%d' % (channel_id, pagenum),
|
||||
channel_id, u'Downloading page %s' % pagenum)
|
||||
webpage = self._download_webpage(
|
||||
self._page_url(base_url, pagenum) ,list_id,
|
||||
u'Downloading page %s' % pagenum)
|
||||
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
|
||||
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
||||
break
|
||||
|
||||
entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
|
||||
for video_id in video_ids]
|
||||
channel_title = self._html_search_regex(r'<a href="/channels/%s">(.*?)</a>' % channel_id,
|
||||
webpage, u'channel title')
|
||||
return {'_type': 'playlist',
|
||||
'id': channel_id,
|
||||
'title': channel_title,
|
||||
'id': list_id,
|
||||
'title': self._extract_list_title(webpage),
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
channel_id = mobj.group('id')
|
||||
return self._extract_videos(channel_id, 'http://vimeo.com/channels/%s' % channel_id)
|
||||
|
||||
|
||||
class VimeoUserIE(VimeoChannelIE):
|
||||
IE_NAME = u'vimeo:user'
|
||||
_VALID_URL = r'(?:https?://)?vimeo.\com/(?P<name>[^/]+)'
|
||||
_TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
if VimeoChannelIE.suitable(url) or VimeoIE.suitable(url) or VimeoAlbumIE.suitable(url) or VimeoGroupsIE.suitable(url):
|
||||
return False
|
||||
return super(VimeoUserIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
return self._extract_videos(name, 'http://vimeo.com/%s' % name)
|
||||
|
||||
|
||||
class VimeoAlbumIE(VimeoChannelIE):
|
||||
IE_NAME = u'vimeo:album'
|
||||
_VALID_URL = r'(?:https?://)?vimeo.\com/album/(?P<id>\d+)'
|
||||
_TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
|
||||
|
||||
def _page_url(self, base_url, pagenum):
|
||||
return '%s/page:%d/' % (base_url, pagenum)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
album_id = mobj.group('id')
|
||||
return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id)
|
||||
|
||||
|
||||
class VimeoGroupsIE(VimeoAlbumIE):
|
||||
IE_NAME = u'vimeo:group'
|
||||
_VALID_URL = r'(?:https?://)?vimeo.\com/groups/(?P<name>[^/]+)'
|
||||
|
||||
def _extract_list_title(self, webpage):
|
||||
return self._og_search_title(webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
return self._extract_videos(name, 'http://vimeo.com/groups/%s' % name)
|
||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class WatIE(InfoExtractor):
|
||||
_VALID_URL=r'http://www.wat.tv/.*-(?P<shortID>.*?)_.*?.html'
|
||||
_VALID_URL=r'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
|
||||
IE_NAME = 'wat.tv'
|
||||
_TEST = {
|
||||
u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
|
||||
|
@@ -11,7 +11,8 @@ class WimpIE(InfoExtractor):
|
||||
u'file': u'deerfence.flv',
|
||||
u'md5': u'8b215e2e0168c6081a1cf84b2846a2b5',
|
||||
u'info_dict': {
|
||||
u"title": u"Watch Till End: Herd of deer jump over a fence."
|
||||
u"title": u"Watch Till End: Herd of deer jump over a fence.",
|
||||
u"description": u"These deer look as fluid as running water when they jump over this fence as a herd. This video is one that needs to be watched until the very end for the true majesty to be witnessed, but once it comes, it's sure to take your breath away.",
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,18 +20,15 @@ class WimpIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._search_regex(r'<meta name="description" content="(.+?)" />',webpage, 'video title')
|
||||
thumbnail_url = self._search_regex(r'<meta property="og\:image" content="(.+?)" />', webpage,'video thumbnail')
|
||||
title = self._html_search_meta('description', webpage, u'video title')
|
||||
googleString = self._search_regex("googleCode = '(.*?)'", webpage, 'file url')
|
||||
googleString = base64.b64decode(googleString).decode('ascii')
|
||||
final_url = self._search_regex('","(.*?)"', googleString,'final video url')
|
||||
ext = final_url.rpartition(u'.')[2]
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': final_url,
|
||||
'ext': ext,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail_url,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': final_url,
|
||||
'title': self._og_search_title(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user