Compare commits

..

120 Commits

Author SHA1 Message Date
00b350d209 [test] tell Travis to install rtmpdump and add initial support to rtmp testing 2013-11-25 17:46:33 -05:00
d8ec4959c8 Merge pull request #1830 from jaimeMF/download-archive
Use the 'extractor_key' field for the download archive file
2013-11-25 14:14:25 -08:00
d31209a144 Use the 'extractor_key' field for the download archive file
It has the same value as the ie_key.
2013-11-25 22:57:15 +01:00
529a2e2cc3 Fix typo in the documentation of the 'download_archive' param 2013-11-25 22:52:09 +01:00
781a7d0546 release 2013.11.25.3 2013-11-25 22:36:18 +01:00
fb04e40396 [soundcloud] Support for listing of audio-only files 2013-11-25 22:34:56 +01:00
d9b011f201 Fix rtmpdump with non-ASCII filenames on Windows on 2.x
Reported in #1798
2013-11-25 22:31:38 +01:00
b0b9eaa196 Merge pull request #1829 from jaimeMF/ydl-empty-params
Allow to initialize a YoutubeDL object without parameters
2013-11-25 13:19:59 -08:00
8b134b1062 Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-25 22:16:07 +01:00
0c75c3fa7a Do not warn about fixed output template if --max-downloads is 1
Fixes #1828
2013-11-25 22:15:33 +01:00
a3927cf7ee Allow to initialize a YoutubeDL object without parameters
Having to pass the 'outtmpl' parameter feels really strange when you just want to extract the info of a video.
2013-11-25 22:03:39 +01:00
1a62c18f65 [bambuser] Skip the download in the test
It doesn't respect the 'Range' header.
2013-11-25 22:03:20 +01:00
2a15e7063b [soundcloud] Prefer HTTP over RTMP (#1798) 2013-11-25 20:30:41 +01:00
d46cc192d7 Reduce socket timeout 2013-11-25 19:11:01 +01:00
bb2bebdbe1 release 2013.11.25.2 2013-11-25 15:47:14 +01:00
5db07df634 Fix --download-archive (Fixes #1826) 2013-11-25 15:46:54 +01:00
ea36cbac5e Merge remote-tracking branch 'rbrito/swap-dimensions' 2013-11-25 06:19:15 +01:00
d0d2b49ab7 [FileDownloader] use moved format_bytes method 2013-11-25 06:17:41 +01:00
31cb6d8fef Merge remote-tracking branch 'rzhxeo/rtmpdump' 2013-11-25 06:16:18 +01:00
daa0dd2973 release 2013.11.25.1 2013-11-25 06:06:39 +01:00
de79c46c8f [viki] Fix subtitle extraction 2013-11-25 06:06:18 +01:00
94ccb6fa2e [viki] Fix subtitles extraction 2013-11-25 05:58:04 +01:00
07e4035879 [viki] Fix uploader extraction 2013-11-25 05:57:55 +01:00
d0efb9ec9a [tests] Remove global_setup function 2013-11-25 03:47:32 +01:00
ac05067d3d release 2013.11.25 2013-11-25 03:37:49 +01:00
113577e155 [generic] Improve detection
Allow download of http://goo.gl/7X5tOk
Fixes #1818
2013-11-25 03:35:53 +01:00
79d09f47c2 Merge branch 'opener-to-ydl' 2013-11-25 03:30:37 +01:00
c059bdd432 Remove quality_name field and improve zdf extractor 2013-11-25 03:28:55 +01:00
02dbf93f0e [zdf/common] Use API in ZDF extractor.
This also comes with a lot of extra format fields
Fixes #1518
2013-11-25 03:13:22 +01:00
1fb2bcbbf7 [viki] Make uploader field optional (#1813) 2013-11-25 02:02:34 +01:00
16e055849e Update the keywords tests for the rename of the old ComedyCentralIE 2013-11-24 22:13:20 +01:00
66cfab4226 [comedycentral] Add support for comedycentral.com videos (closes #1824)
It's a subclass of MTVIE

The extractor for colbertnation.com and thedailyshow.com is called now ComedyCentralShowsIE
2013-11-24 21:18:35 +01:00
6d88bc37a3 [viki] Skip travis test
Also provide a better error message for geoblocked videos.
2013-11-24 15:28:50 +01:00
b7553b2554 [vik] Clarify output 2013-11-24 15:20:16 +01:00
e03db0a077 Merge branch 'master' into opener-to-ydl 2013-11-24 15:18:44 +01:00
a1ee09e815 Document proxy 2013-11-24 15:03:25 +01:00
267ed0c5d3 [collegehumor] Encode the xml before calling xml.etree.ElementTree.fromstring (fixes #1822)
Uses a new helper method in InfoExtractor: _download_xml
2013-11-24 14:59:19 +01:00
f459d17018 [youtube] Add an extractor for downloading the watch history (closes #1821) 2013-11-24 14:33:50 +01:00
dc65dcbb6d [mixcloud] The description field may be missing (fixes #1819) 2013-11-24 11:28:44 +01:00
d214fdb8fe [brightcove] Don't use 'or' with the xml nodes, use the 'value' attribute instead 2013-11-24 11:02:34 +01:00
138df537ff release 2013.11.24.1 2013-11-24 07:51:56 +01:00
0c7c19d6bc [clipfish] Add extractor (Fixes #1760) 2013-11-24 07:51:44 +01:00
eaaafc59c2 release 2013.11.24 2013-11-24 07:30:34 +01:00
382ed50e0e [viki] Add extractor (fixes #1813) 2013-11-24 07:30:05 +01:00
66ec019240 [youtube] do not use variable name twice 2013-11-24 06:54:26 +01:00
bd49928f7a [niconico] Clarify download 2013-11-24 06:53:50 +01:00
23e6d50d73 [bandcamp] Remove unused variable 2013-11-24 06:52:53 +01:00
2e767313e4 [update] fix error 2013-11-24 06:52:21 +01:00
38b2db6a66 Credit @takuya0301 for niconico 2013-11-24 06:39:49 +01:00
13ebea791f [niconico] Simplify and make work with old Python versions
The website requires SSLv3, otherwise it just times out during SSL negotiation.
2013-11-24 06:39:10 +01:00
4c9c57428f Merge remote-tracking branch 'takuya0301/niconico' 2013-11-24 06:09:11 +01:00
8bf9319e9c Simplify logger code(#1811) 2013-11-24 06:08:11 +01:00
4914120727 Merge remote-tracking branch 'iTaybb/master' 2013-11-24 06:07:12 +01:00
36de0a0e1a [brightcove] Set the 'videoPlayer' value to the 'videoId' if it's missing in the parameters (fixes #1815) 2013-11-23 23:27:15 +01:00
e5c146d586 [streamcloud] skip test on travis 2013-11-23 15:57:42 +01:00
52ad14aeb0 Add support for niconico 2013-11-23 18:19:44 +09:00
43afe28588 Log to an external logger (fixes #1810)
Sadly applications using youtube-dl's python sources can't directly
access it's log stream. It's pretty much limited to stdout and stderr
only.

It should log to logging.Logger instance passed to YoutubeDL's params
dictionary.
2013-11-23 10:22:18 +02:00
a87b0615aa release 2013.11.22.2 2013-11-22 23:08:15 +01:00
d7386f6276 [update] Check if version from repository is newer before updating
Closes #1704
2013-11-22 23:05:58 +01:00
081640940e Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-22 22:46:57 +01:00
7012b23c94 Match --download-archive during playlist processing (Fixes #1745) 2013-11-22 22:46:46 +01:00
d3b30148ed [bambuser:channel] Update test 2013-11-22 21:26:31 +01:00
9f79463803 [howcast] update test's checksum 2013-11-22 21:25:12 +01:00
d35dc6d3b5 [bandcamp] move the album test to the album extractor and return a single track instead of a playlist 2013-11-22 21:19:31 +01:00
50123be421 release 2013.11.22.1 2013-11-22 20:23:55 +01:00
3f8ced5144 Merge remote-tracking branch 'jaimeMF/yt-playlists' 2013-11-22 20:11:54 +01:00
00ea0f11eb Print full title in --get-title output (#1806) 2013-11-22 20:00:35 +01:00
dca0872056 Move the opener to the YoutubeDL object.
This is the first step towards being able to just import youtube_dl and start using it.
Apart from removing global state, this would fix problems like #1805.
2013-11-22 19:57:52 +01:00
0b63aed8df [update] do not assign to unused variables 2013-11-22 19:15:36 +01:00
15c3adbb16 Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-22 19:08:33 +01:00
f143a42fe6 [bandcamp] Skip album test 2013-11-22 19:08:25 +01:00
241650c7ff [vimeo] Fix the extraction of vimeo pro and player.vimeo.com videos 2013-11-22 18:20:31 +01:00
bfe7439a20 release 2013.11.22 2013-11-22 17:46:26 +01:00
cffa6aa107 [bandcamp] Support trackinfo-style songs (Fixes #1270) 2013-11-22 17:44:55 +01:00
02e4ebbbad [streamcloud] Add IE (Fixes #1801) 2013-11-22 17:19:22 +01:00
ab009f59ef [toutv] Fix a typo 2013-11-22 17:18:03 +01:00
0980426559 [bandcamp] add support for albums (reported in #1270) 2013-11-22 16:05:14 +01:00
b1c9c66936 Remove unnecessary slash in setup.py (Fixes #1778) 2013-11-21 23:26:28 +01:00
a6a173c2fd utils.shell_quote: Convert the args to unicode strings
The youtube test video failed with `UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 34: ordinal not in range(128)`, the problem was with the filenames being encoded.
2013-11-21 14:09:28 +01:00
2bb683c201 release 2013.11.21 2013-11-21 13:59:33 +01:00
64bb5187f5 [soundcloud] Retrieve the file url using the client_id for the iPhone (fixes #1798)
The desktop's client_id always give the rtmp url, but with the iPhone one it returns the http url if it's available.
2013-11-21 13:16:19 +01:00
9e4f50a8ae [sztv] skip test, site is undergoing mid-term maintenance 2013-11-20 09:59:03 +01:00
0190eecc00 [nhl] Make NHLVideocenter IE_DESC fit with other descriptions 2013-11-20 09:45:29 +01:00
ca872a4c0b [spankwire] Fix description search 2013-11-20 09:23:53 +01:00
f2e87ef4fa [anitube] Skip test (on travis) 2013-11-20 07:46:44 +01:00
0ad97bbc05 [spankwire] fix check for description 2013-11-20 07:45:32 +01:00
c4864091a1 [videopremium] Support new crazy redirect scheme 2013-11-20 07:43:21 +01:00
9a98a466b3 [toutv] really skip test 2013-11-20 07:37:22 +01:00
f99e0f1ed6 Adapt age restriction tests to new .info.json filenames 2013-11-20 07:37:07 +01:00
d323bcb152 release 2013.11.20 2013-11-20 07:25:17 +01:00
da6a795fdb [escapist] Fix title search 2013-11-20 07:23:23 +01:00
c5edcde21f [escapist] upper-case URL 2013-11-20 06:56:59 +01:00
15ff3c831e [escapist] Fix syntax error 2013-11-20 06:55:07 +01:00
100959a6d9 [escapist] Add support for HD format (Closes #1755) 2013-11-20 06:52:08 +01:00
0a120f74b2 Credit @diffycat for anitube 2013-11-20 06:36:00 +01:00
8f05351984 [anitube] Minor fixes (#1776) 2013-11-20 06:35:02 +01:00
4eb92208a3 Adapt test to changed .info.json name 2013-11-20 06:34:48 +01:00
71791f414c Merge remote-tracking branch 'diffycat/master' 2013-11-20 06:28:13 +01:00
f3682997d7 Clean up unused imports and other minor mistakes 2013-11-20 06:27:48 +01:00
cc13cc0251 [teamcoco] Correct error 2013-11-20 06:25:33 +01:00
86bd5f2ca9 Merge remote-tracking branch 'dz0ny/patch-1' 2013-11-20 06:21:05 +01:00
8694c60000 import json for --dump-json 2013-11-20 06:18:24 +01:00
9d1538182f Add an option to dump json information 2013-11-20 06:14:57 +01:00
5904088811 Add support for tou.tv (Fixes #1792) 2013-11-20 06:13:19 +01:00
69545c2aff [d8] inherit from CanalplusIE
it reuses the same extraction process
2013-11-19 20:44:20 +01:00
495da337ae Merge pull request #1758 from migbac/master
Add support for d8.tv
2013-11-19 20:43:14 +01:00
34b3afc7be release 2013.11.19 2013-11-19 12:41:01 +01:00
00373a4c5d Merge pull request #1790 from rg3/console-title
Correctly write and restore the console title on the stack (fixes #1782)
2013-11-18 07:50:10 -08:00
cb7dfeeac4 [youtube] only allow domain name to be upper-case (#1786) 2013-11-18 16:42:35 +01:00
efd6c574a2 Correctly write and restore the console title on the stack (fixes #1782) 2013-11-18 16:35:41 +01:00
4113e6ab56 [auengine] Do not return unnecessary ext 2013-11-18 14:36:01 +01:00
2b35c9ef74 Merge branch 'master' into rtmpdump
Conflicts:
	youtube_dl/FileDownloader.py

Merge
2013-11-18 00:27:06 +01:00
ba3881dffd Add support for anitube.se (#1417) 2013-11-16 18:26:34 +04:00
08bc37cdd0 Update test_write_info_json.py 2013-11-13 18:55:49 +01:00
9771cceb2c Fix filename extension leaking to json filename
Makes writeinfojson behaving exactly as writethumbnail in case where filename contains mediafile extension.

Case:

video.mp4 converted to music.mp3 would yield music.mp4.info.json instead music.mp3.info.json or music.info.json
2013-11-13 18:34:03 +01:00
880e1c529d [youtube:playlist] Login into youtube if requested (fixes #1757)
Allows to download private playlists
2013-11-13 16:39:11 +01:00
dcbb45803f [youtube:playlist] Don't use the gdata api (closes #1508)
Parse the playlist pages instead
2013-11-13 16:26:50 +01:00
0bd59f3723 Add support for d8.tv 2013-11-12 23:32:03 +01:00
4894fe8c5b Report download progress of rtmpdump 2013-11-09 11:14:40 +01:00
d5a9bb4ea9 extractor: youtube: Swap video dimensions to match standard practice.
While working on this, I thought about simplifying things like changing
480x854 to 480p, and that seemed like a good option, until I realized that
people (me included) usually link the concept of some number followed by a p
with the video being 16:9.

So, we would be losing some information and, as we all know,
[explicit is better than implicit][*].

[*]: http://www.python.org/dev/peps/pep-0020/

This closes #1446.

Signed-off-by: Rogério Brito <rbrito@ime.usp.br>
2013-10-19 14:04:44 -03:00
62 changed files with 1338 additions and 435 deletions

View File

@ -3,6 +3,9 @@ python:
- "2.6"
- "2.7"
- "3.3"
before_install:
- sudo apt-get update -qq
- sudo apt-get install -qq rtmpdump
script: nosetests test --verbose
notifications:
email:

View File

@ -123,6 +123,7 @@ which means you can modify it, redistribute it or use it however you like.
--get-description simulate, quiet but print video description
--get-filename simulate, quiet but print output filename
--get-format simulate, quiet but print output format
-j, --dump-json simulate, quiet but print JSON information
--newline output progress bar as new lines
--no-progress do not print progress bar
--console-title display progress in console titlebar

View File

@ -48,7 +48,7 @@ else:
'data_files': [ # Installing system-wide would require sudo...
('etc/bash_completion.d', ['youtube-dl.bash-completion']),
('share/doc/youtube_dl', ['README.txt']),
('share/man/man1/', ['youtube-dl.1'])
('share/man/man1', ['youtube-dl.1'])
]
}
if setuptools_available:

View File

@ -12,10 +12,6 @@ from youtube_dl import YoutubeDL
from youtube_dl.utils import preferredencoding
def global_setup():
youtube_dl._setup_opener(timeout=10)
def get_params(override=None):
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
"parameters.json")

View File

@ -6,8 +6,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import global_setup, try_rm
global_setup()
from test.helper import try_rm
from youtube_dl import YoutubeDL
@ -24,7 +23,7 @@ def _download_restricted(url, filename, age):
}
ydl = YoutubeDL(params)
ydl.add_default_info_extractors()
json_filename = filename + '.info.json'
json_filename = os.path.splitext(filename)[0] + '.info.json'
try_rm(json_filename)
ydl.download([url])
res = os.path.exists(json_filename)

View File

@ -100,10 +100,11 @@ class TestAllURLsMatching(unittest.TestCase):
def test_keywords(self):
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
self.assertMatch(':thedailyshow', ['ComedyCentral'])
self.assertMatch(':tds', ['ComedyCentral'])
self.assertMatch(':colbertreport', ['ComedyCentral'])
self.assertMatch(':cr', ['ComedyCentral'])
self.assertMatch(':ythistory', ['youtube:history'])
self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
self.assertMatch(':tds', ['ComedyCentralShows'])
self.assertMatch(':colbertreport', ['ComedyCentralShows'])
self.assertMatch(':cr', ['ComedyCentralShows'])
if __name__ == '__main__':

View File

@ -9,12 +9,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import (
get_params,
get_testcases,
global_setup,
try_rm,
md5,
report_warning
)
global_setup()
import hashlib
@ -103,7 +101,7 @@ def generator(test_case):
tc_filename = get_tc_filename(tc)
try_rm(tc_filename)
try_rm(tc_filename + '.part')
try_rm(tc_filename + '.info.json')
try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
try_rm_tcs_files()
try:
try_num = 1
@ -130,11 +128,12 @@ def generator(test_case):
if not test_case.get('params', {}).get('skip_download', False):
self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
self.assertTrue(tc_filename in finished_hook_called)
self.assertTrue(os.path.exists(tc_filename + '.info.json'))
info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
self.assertTrue(os.path.exists(info_json_fn))
if 'md5' in tc:
md5_for_file = _file_md5(tc_filename)
self.assertEqual(md5_for_file, tc['md5'])
with io.open(tc_filename + '.info.json', encoding='utf-8') as infof:
with io.open(info_json_fn, encoding='utf-8') as infof:
info_dict = json.load(infof)
for (info_field, expected) in tc.get('info_dict', {}).items():
if isinstance(expected, compat_str) and expected.startswith('md5:'):

View File

@ -8,8 +8,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup
global_setup()
from test.helper import FakeYDL
from youtube_dl.extractor import (
@ -22,6 +21,7 @@ from youtube_dl.extractor import (
LivestreamIE,
NHLVideocenterIE,
BambuserChannelIE,
BandcampAlbumIE
)
@ -101,7 +101,15 @@ class TestPlaylists(unittest.TestCase):
result = ie.extract('http://bambuser.com/channel/pixelversity')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'pixelversity')
self.assertTrue(len(result['entries']) >= 66)
self.assertTrue(len(result['entries']) >= 60)
def test_bandcamp_album(self):
dl = FakeYDL()
ie = BandcampAlbumIE(dl)
result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'Nightmare Night EP')
self.assertTrue(len(result['entries']) >= 4)
if __name__ == '__main__':
unittest.main()

View File

@ -6,8 +6,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup, md5
global_setup()
from test.helper import FakeYDL, md5
from youtube_dl.extractor import (

View File

@ -24,6 +24,8 @@ from youtube_dl.utils import (
xpath_with_ns,
smuggle_url,
unsmuggle_url,
shell_quote,
encodeFilename,
)
if sys.version_info < (3, 0):
@ -170,6 +172,10 @@ class TestUtil(unittest.TestCase):
self.assertEqual(res_url, url)
self.assertEqual(res_data, None)
def test_shell_quote(self):
args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')]
self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""")
if __name__ == '__main__':
unittest.main()

View File

@ -7,8 +7,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_params, global_setup, try_rm
global_setup()
from test.helper import get_params, try_rm
import io

View File

@ -7,8 +7,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_params, global_setup
global_setup()
from test.helper import get_params
import io
@ -31,7 +30,7 @@ params = get_params({
TEST_ID = 'BaW_jenozKc'
INFO_JSON_FILE = TEST_ID + '.mp4.info.json'
INFO_JSON_FILE = TEST_ID + '.info.json'
DESCRIPTION_FILE = TEST_ID + '.mp4.description'
EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐

View File

@ -6,8 +6,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup
global_setup()
from test.helper import FakeYDL
from youtube_dl.extractor import (
@ -27,7 +26,7 @@ class TestYoutubeLists(unittest.TestCase):
def test_youtube_playlist(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')[0]
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'ytdl test PL')
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
@ -44,13 +43,13 @@ class TestYoutubeLists(unittest.TestCase):
def test_issue_673(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
result = ie.extract('PLBB231211A4F62143')[0]
result = ie.extract('PLBB231211A4F62143')
self.assertTrue(len(result['entries']) > 25)
def test_youtube_playlist_long(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')[0]
result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
self.assertIsPlaylist(result)
self.assertTrue(len(result['entries']) >= 799)
@ -58,7 +57,7 @@ class TestYoutubeLists(unittest.TestCase):
#651
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')[0]
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
self.assertFalse('pElCt5oNDuI' in ytie_results)
self.assertFalse('KdPEApIVdWM' in ytie_results)
@ -66,7 +65,7 @@ class TestYoutubeLists(unittest.TestCase):
def test_youtube_playlist_empty(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')[0]
result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')
self.assertIsPlaylist(result)
self.assertEqual(len(result['entries']), 0)
@ -74,7 +73,7 @@ class TestYoutubeLists(unittest.TestCase):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
# TODO find a > 100 (paginating?) videos course
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')[0]
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
entries = result['entries']
self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
self.assertEqual(len(entries), 25)
@ -84,22 +83,22 @@ class TestYoutubeLists(unittest.TestCase):
dl = FakeYDL()
ie = YoutubeChannelIE(dl)
#test paginated channel
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0]
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
self.assertTrue(len(result['entries']) > 90)
#test autogenerated channel
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')[0]
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
self.assertTrue(len(result['entries']) >= 18)
def test_youtube_user(self):
dl = FakeYDL()
ie = YoutubeUserIE(dl)
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0]
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
self.assertTrue(len(result['entries']) >= 320)
def test_youtube_safe_search(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0]
result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')
self.assertEqual(len(result['entries']), 2)
def test_youtube_show(self):

View File

@ -6,9 +6,6 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import global_setup
global_setup()
import io
import re

View File

@ -1,4 +1,3 @@
import math
import os
import re
import subprocess
@ -11,6 +10,7 @@ from .utils import (
ContentTooShortError,
determine_ext,
encodeFilename,
format_bytes,
sanitize_open,
timeconvert,
)
@ -53,20 +53,6 @@ class FileDownloader(object):
self._progress_hooks = []
self.params = params
@staticmethod
def format_bytes(bytes):
if bytes is None:
return 'N/A'
if type(bytes) is str:
bytes = float(bytes)
if bytes == 0.0:
exponent = 0
else:
exponent = int(math.log(bytes, 1024.0))
suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
converted = float(bytes) / float(1024 ** exponent)
return '%.2f%s' % (converted, suffix)
@staticmethod
def format_seconds(seconds):
(mins, secs) = divmod(seconds, 60)
@ -117,7 +103,7 @@ class FileDownloader(object):
def format_speed(speed):
if speed is None:
return '%10s' % '---b/s'
return '%10s' % ('%s/s' % FileDownloader.format_bytes(speed))
return '%10s' % ('%s/s' % format_bytes(speed))
@staticmethod
def best_block_size(elapsed_time, bytes):
@ -270,6 +256,61 @@ class FileDownloader(object):
(clear_line, data_len_str, self.format_seconds(tot_time)))
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live):
def run_rtmpdump(args):
start = time.time()
resume_percent = None
resume_downloaded_data_len = None
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
cursor_in_new_line = True
proc_stderr_closed = False
while not proc_stderr_closed:
# read line from stderr
line = u''
while True:
char = proc.stderr.read(1)
if not char:
proc_stderr_closed = True
break
if char in [b'\r', b'\n']:
break
line += char.decode('ascii', 'replace')
if not line:
# proc_stderr_closed is True
continue
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
if mobj:
downloaded_data_len = int(float(mobj.group(1))*1024)
percent = float(mobj.group(2))
if not resume_percent:
resume_percent = percent
resume_downloaded_data_len = downloaded_data_len
eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
data_len = None
if percent > 0:
data_len = int(downloaded_data_len * 100 / percent)
data_len_str = u'~' + format_bytes(data_len)
self.report_progress(percent, data_len_str, speed, eta)
cursor_in_new_line = False
self._hook_progress({
'downloaded_bytes': downloaded_data_len,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'eta': eta,
'speed': speed,
})
elif self.params.get('verbose', False):
if not cursor_in_new_line:
self.to_screen(u'')
cursor_in_new_line = True
self.to_screen(u'[rtmpdump] '+line)
proc.wait()
if not cursor_in_new_line:
self.to_screen(u'')
return proc.returncode
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
test = self.params.get('test', False)
@ -280,12 +321,11 @@ class FileDownloader(object):
except (OSError, IOError):
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
return False
verbosity_option = '--verbose' if self.params.get('verbose', False) else '--quiet'
# Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = ['rtmpdump', verbosity_option, '-r', url, '-o', tmpfilename]
basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
if player_url is not None:
basic_args += ['--swfVfy', player_url]
if page_url is not None:
@ -299,30 +339,48 @@ class FileDownloader(object):
if live:
basic_args += ['--live']
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
if sys.platform == 'win32' and sys.version_info < (3, 0):
# Windows subprocess module does not actually support Unicode
# on Python 2.x
# See http://stackoverflow.com/a/9951851/35070
subprocess_encoding = sys.getfilesystemencoding()
args = [a.encode(subprocess_encoding, 'ignore') for a in args]
else:
subprocess_encoding = None
if self.params.get('verbose', False):
if subprocess_encoding:
str_args = [
a.decode(subprocess_encoding) if isinstance(a, bytes) else a
for a in args]
else:
str_args = args
try:
import pipes
shell_quote = lambda args: ' '.join(map(pipes.quote, args))
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
except ImportError:
shell_quote = repr
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
retval = subprocess.call(args)
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
retval = run_rtmpdump(args)
while (retval == 2 or retval == 1) and not test:
prevsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
time.sleep(5.0) # This seems to be needed
retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
cursize = os.path.getsize(encodeFilename(tmpfilename))
if prevsize == cursize and retval == 1:
break
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
if prevsize == cursize and retval == 2 and cursize > 1024:
self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
retval = 0
break
if retval == 0 or (test and retval == 2):
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
self.to_screen(u'[rtmpdump] %s bytes' % fsize)
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
@ -525,7 +583,7 @@ class FileDownloader(object):
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False
data_len_str = self.format_bytes(data_len)
data_len_str = format_bytes(data_len)
byte_counter = 0 + resume_len
block_size = self.params.get('buffersize', 1024)
start = time.time()

View File

@ -5,9 +5,12 @@ from __future__ import absolute_import
import errno
import io
import json
import os
import platform
import re
import shutil
import subprocess
import socket
import sys
import time
@ -17,6 +20,7 @@ if os.name == 'nt':
import ctypes
from .utils import (
compat_cookiejar,
compat_http_client,
compat_print,
compat_str,
@ -29,9 +33,12 @@ from .utils import (
DownloadError,
encodeFilename,
ExtractorError,
format_bytes,
locked_file,
make_HTTPS_handler,
MaxDownloadsReached,
PostProcessingError,
platform_name,
preferredencoding,
SameFileError,
sanitize_filename,
@ -40,9 +47,11 @@ from .utils import (
UnavailableVideoError,
write_json_file,
write_string,
YoutubeDLHandler,
)
from .extractor import get_info_extractor, gen_extractors
from .FileDownloader import FileDownloader
from .version import __version__
class YoutubeDL(object):
@ -84,6 +93,7 @@ class YoutubeDL(object):
forcethumbnail: Force printing thumbnail URL.
forcedescription: Force printing description.
forcefilename: Force printing final filename.
forcejson: Force printing info_dict as JSON.
simulate: Do not download the video files.
format: Video format code.
format_limit: Highest quality format to try.
@ -95,6 +105,7 @@ class YoutubeDL(object):
playlistend: Playlist item to end at.
matchtitle: Download only matching titles.
rejecttitle: Reject downloads for matching titles.
logger: Log messages to a logging.Logger instance.
logtostderr: Log messages to stderr instead of stdout.
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
@ -115,9 +126,12 @@ class YoutubeDL(object):
noplaylist: Download single video instead of a playlist if in doubt.
age_limit: An integer representing the user's age in years.
Unsuitable videos for the given age are skipped.
downloadarchive: File name of a file where all downloads are recorded.
download_archive: File name of a file where all downloads are recorded.
Videos already present in the file are not downloaded
again.
cookiefile: File name where cookies should be read from and dumped to.
nocheckcertificate:Do not verify SSL certificates
proxy: URL of the proxy server to use
The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader:
@ -132,7 +146,7 @@ class YoutubeDL(object):
_num_downloads = None
_screen_file = None
def __init__(self, params):
def __init__(self, params={}):
"""Create a FileDownloader object with the given options."""
self._ies = []
self._ies_instances = {}
@ -155,9 +169,11 @@ class YoutubeDL(object):
self.params = params
self.fd = FileDownloader(self, self.params)
if '%(stitle)s' in self.params['outtmpl']:
if '%(stitle)s' in self.params.get('outtmpl', ''):
self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
self._setup_opener()
def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list."""
self._ies.append(ie)
@ -190,7 +206,9 @@ class YoutubeDL(object):
def to_screen(self, message, skip_eol=False):
"""Print message to stdout if not in quiet mode."""
if not self.params.get('quiet', False):
if self.params.get('logger'):
self.params['logger'].debug(message)
elif not self.params.get('quiet', False):
terminator = [u'\n', u''][skip_eol]
output = message + terminator
write_string(output, self._screen_file)
@ -198,10 +216,13 @@ class YoutubeDL(object):
def to_stderr(self, message):
"""Print message to stderr."""
assert type(message) == type(u'')
output = message + u'\n'
if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
output = output.encode(preferredencoding())
sys.stderr.write(output)
if self.params.get('logger'):
self.params['logger'].error(message)
else:
output = message + u'\n'
if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
output = output.encode(preferredencoding())
sys.stderr.write(output)
def to_console_title(self, message):
if not self.params.get('consoletitle', False):
@ -217,13 +238,15 @@ class YoutubeDL(object):
if not self.params.get('consoletitle', False):
return
if 'TERM' in os.environ:
write_string(u'\033[22t', self._screen_file)
# Save the title on stack
write_string(u'\033[22;0t', self._screen_file)
def restore_console_title(self):
if not self.params.get('consoletitle', False):
return
if 'TERM' in os.environ:
write_string(u'\033[23t', self._screen_file)
# Restore the title from stack
write_string(u'\033[23;0t', self._screen_file)
def __enter__(self):
self.save_console_title()
@ -231,10 +254,9 @@ class YoutubeDL(object):
def __exit__(self, *args):
self.restore_console_title()
def fixed_template(self):
"""Checks if the output template is fixed."""
return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
if self.params.get('cookiefile') is not None:
self.cookiejar.save()
def trouble(self, message=None, tb=None):
"""Determine action to take when a download problem appears.
@ -351,15 +373,17 @@ class YoutubeDL(object):
def _match_entry(self, info_dict):
""" Returns None iff the file should be downloaded """
title = info_dict['title']
matchtitle = self.params.get('matchtitle', False)
if matchtitle:
if not re.search(matchtitle, title, re.IGNORECASE):
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
rejecttitle = self.params.get('rejecttitle', False)
if rejecttitle:
if re.search(rejecttitle, title, re.IGNORECASE):
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
if 'title' in info_dict:
# This can happen when we're just evaluating the playlist
title = info_dict['title']
matchtitle = self.params.get('matchtitle', False)
if matchtitle:
if not re.search(matchtitle, title, re.IGNORECASE):
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
rejecttitle = self.params.get('rejecttitle', False)
if rejecttitle:
if re.search(rejecttitle, title, re.IGNORECASE):
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
date = info_dict.get('upload_date', None)
if date is not None:
dateRange = self.params.get('daterange', DateRange())
@ -370,8 +394,8 @@ class YoutubeDL(object):
if age_limit < info_dict.get('age_limit', 0):
return u'Skipping "' + title + '" because it is age restricted'
if self.in_download_archive(info_dict):
return (u'%(title)s has already been recorded in archive'
% info_dict)
return (u'%s has already been recorded in archive'
% info_dict.get('title', info_dict.get('id', u'video')))
return None
@staticmethod
@ -450,7 +474,7 @@ class YoutubeDL(object):
ie_key=ie_result.get('ie_key'),
extra_info=extra_info)
elif result_type == 'playlist':
self.add_extra_info(ie_result, extra_info)
# We process each entry in the playlist
playlist = ie_result.get('title', None) or ie_result.get('id', None)
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
@ -480,6 +504,12 @@ class YoutubeDL(object):
'webpage_url': ie_result['webpage_url'],
'extractor_key': ie_result['extractor_key'],
}
reason = self._match_entry(entry)
if reason is not None:
self.to_screen(u'[download] ' + reason)
continue
entry_result = self.process_ie_result(entry,
download=download,
extra_info=extra)
@ -635,7 +665,7 @@ class YoutubeDL(object):
# Forced printings
if self.params.get('forcetitle', False):
compat_print(info_dict['title'])
compat_print(info_dict['fulltitle'])
if self.params.get('forceid', False):
compat_print(info_dict['id'])
if self.params.get('forceurl', False):
@ -649,6 +679,8 @@ class YoutubeDL(object):
compat_print(filename)
if self.params.get('forceformat', False):
compat_print(info_dict['format'])
if self.params.get('forcejson', False):
compat_print(json.dumps(info_dict))
# Do nothing else if in simulate mode
if self.params.get('simulate', False):
@ -711,7 +743,7 @@ class YoutubeDL(object):
return
if self.params.get('writeinfojson', False):
infofn = filename + u'.info.json'
infofn = os.path.splitext(filename)[0] + u'.info.json'
self.report_writeinfojson(infofn)
try:
json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
@ -762,13 +794,15 @@ class YoutubeDL(object):
def download(self, url_list):
"""Download a given list of URLs."""
if len(url_list) > 1 and self.fixed_template():
if (len(url_list) > 1 and
'%' not in self.params['outtmpl']
and self.params.get('max_downloads') != 1):
raise SameFileError(self.params['outtmpl'])
for url in url_list:
try:
#It also downloads the videos
videos = self.extract_info(url)
self.extract_info(url)
except UnavailableVideoError:
self.report_error(u'unable to download video')
except MaxDownloadsReached:
@ -800,11 +834,26 @@ class YoutubeDL(object):
except (IOError, OSError):
self.report_warning(u'Unable to remove downloaded video file')
def _make_archive_id(self, info_dict):
# Future-proof against any change in case
# and backwards compatibility with prior versions
extractor = info_dict.get('extractor_key')
if extractor is None:
if 'id' in info_dict:
extractor = info_dict.get('ie_key') # key in a playlist
if extractor is None:
return None # Incomplete video information
return extractor.lower() + u' ' + info_dict['id']
def in_download_archive(self, info_dict):
fn = self.params.get('download_archive')
if fn is None:
return False
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
vid_id = self._make_archive_id(info_dict)
if vid_id is None:
return False # Incomplete video information
try:
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
for line in archive_file:
@ -819,12 +868,15 @@ class YoutubeDL(object):
fn = self.params.get('download_archive')
if fn is None:
return
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
vid_id = self._make_archive_id(info_dict)
assert vid_id
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
archive_file.write(vid_id + u'\n')
@staticmethod
def format_resolution(format, default='unknown'):
if format.get('vcodec') == 'none':
return 'audio only'
if format.get('_resolution') is not None:
return format['_resolution']
if format.get('height') is not None:
@ -838,10 +890,11 @@ class YoutubeDL(object):
def list_formats(self, info_dict):
def format_note(fdict):
if fdict.get('format_note') is not None:
return fdict['format_note']
res = u''
if fdict.get('vcodec') is not None:
if fdict.get('format_note') is not None:
res += fdict['format_note'] + u' '
if (fdict.get('vcodec') is not None and
fdict.get('vcodec') != 'none'):
res += u'%-5s' % fdict['vcodec']
elif fdict.get('vbr') is not None:
res += u'video'
@ -857,25 +910,100 @@ class YoutubeDL(object):
res += 'audio'
if fdict.get('abr') is not None:
res += u'@%3dk' % fdict['abr']
if fdict.get('filesize') is not None:
if res:
res += u', '
res += format_bytes(fdict['filesize'])
return res
def line(format):
return (u'%-20s%-10s%-12s%s' % (
def line(format, idlen=20):
return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
format['format_id'],
format['ext'],
self.format_resolution(format),
format_note(format),
)
)
))
formats = info_dict.get('formats', [info_dict])
formats_s = list(map(line, formats))
idlen = max(len(u'format code'),
max(len(f['format_id']) for f in formats))
formats_s = [line(f, idlen) for f in formats]
if len(formats) > 1:
formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
header_line = line({
'format_id': u'format code', 'ext': u'extension',
'_resolution': u'resolution', 'format_note': u'note'})
'_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
(info_dict['id'], header_line, u"\n".join(formats_s)))
def urlopen(self, req):
""" Start an HTTP download """
return self._opener.open(req)
def print_debug_header(self):
if not self.params.get('verbose'):
return
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
try:
sp = subprocess.Popen(
['git', 'rev-parse', '--short', 'HEAD'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
cwd=os.path.dirname(os.path.abspath(__file__)))
out, err = sp.communicate()
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
write_string(u'[debug] Git HEAD: ' + out + u'\n')
except:
try:
sys.exc_clear()
except:
pass
write_string(u'[debug] Python version %s - %s' %
(platform.python_version(), platform_name()) + u'\n')
proxy_map = {}
for handler in self._opener.handlers:
if hasattr(handler, 'proxies'):
proxy_map.update(handler.proxies)
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
def _setup_opener(self, timeout=20):
opts_cookiefile = self.params.get('cookiefile')
opts_proxy = self.params.get('proxy')
if opts_cookiefile is None:
self.cookiejar = compat_cookiejar.CookieJar()
else:
self.cookiejar = compat_cookiejar.MozillaCookieJar(
opts_cookiefile)
if os.access(opts_cookiefile, os.R_OK):
self.cookiejar.load()
cookie_processor = compat_urllib_request.HTTPCookieProcessor(
self.cookiejar)
if opts_proxy is not None:
if opts_proxy == '':
proxies = {}
else:
proxies = {'http': opts_proxy, 'https': opts_proxy}
else:
proxies = compat_urllib_request.getproxies()
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
if 'http' in proxies and 'https' not in proxies:
proxies['https'] = proxies['http']
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
https_handler = make_HTTPS_handler(
self.params.get('nocheckcertificate', False))
opener = compat_urllib_request.build_opener(
https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
opener.addheaders = []
self._opener = opener
# TODO remove this global modification
compat_urllib_request.install_opener(opener)
socket.setdefaulttimeout(timeout)

View File

@ -34,50 +34,42 @@ __authors__ = (
'Andras Elso',
'Jelle van der Waa',
'Marcin Cieślak',
'Anton Larionov',
'Takuya Tsuchida',
)
__license__ = 'Public Domain'
import codecs
import collections
import getpass
import optparse
import os
import random
import re
import shlex
import socket
import subprocess
import sys
import traceback
import platform
from .utils import (
compat_cookiejar,
compat_print,
compat_str,
compat_urllib_request,
DateRange,
decodeOption,
determine_ext,
DownloadError,
get_cachedir,
make_HTTPS_handler,
MaxDownloadsReached,
platform_name,
preferredencoding,
SameFileError,
std_headers,
write_string,
YoutubeDLHandler,
)
from .update import update_self
from .version import __version__
from .FileDownloader import (
FileDownloader,
)
from .extractor import gen_extractors
from .version import __version__
from .YoutubeDL import YoutubeDL
from .PostProcessor import (
FFmpegMetadataPP,
@ -214,7 +206,9 @@ def parseOpts(overrideArguments=None):
dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
selection.add_option('--max-downloads', metavar='NUMBER',
dest='max_downloads', type=int, default=None,
help='Abort after downloading NUMBER files')
selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
@ -306,6 +300,9 @@ def parseOpts(overrideArguments=None):
verbosity.add_option('--get-format',
action='store_true', dest='getformat',
help='simulate, quiet but print output format', default=False)
verbosity.add_option('-j', '--dump-json',
action='store_true', dest='dumpjson',
help='simulate, quiet but print JSON information', default=False)
verbosity.add_option('--newline',
action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
verbosity.add_option('--no-progress',
@ -447,19 +444,6 @@ def _real_main(argv=None):
parser, opts, args = parseOpts(argv)
# Open appropriate CookieJar
if opts.cookiefile is None:
jar = compat_cookiejar.CookieJar()
else:
try:
jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile)
if os.access(opts.cookiefile, os.R_OK):
jar.load()
except (IOError, OSError) as err:
if opts.verbose:
traceback.print_exc()
write_string(u'ERROR: unable to open cookie file\n')
sys.exit(101)
# Set user agent
if opts.user_agent is not None:
std_headers['User-Agent'] = opts.user_agent
@ -491,8 +475,6 @@ def _real_main(argv=None):
all_urls = batchurls + args
all_urls = [url.strip() for url in all_urls]
opener = _setup_opener(jar=jar, opts=opts)
extractors = gen_extractors()
if opts.list_extractors:
@ -547,7 +529,7 @@ def _real_main(argv=None):
if opts.retries is not None:
try:
opts.retries = int(opts.retries)
except (TypeError, ValueError) as err:
except (TypeError, ValueError):
parser.error(u'invalid retry count specified')
if opts.buffersize is not None:
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
@ -558,13 +540,13 @@ def _real_main(argv=None):
opts.playliststart = int(opts.playliststart)
if opts.playliststart <= 0:
raise ValueError(u'Playlist start must be positive')
except (TypeError, ValueError) as err:
except (TypeError, ValueError):
parser.error(u'invalid playlist start number specified')
try:
opts.playlistend = int(opts.playlistend)
if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
raise ValueError(u'Playlist end must be greater than playlist start')
except (TypeError, ValueError) as err:
except (TypeError, ValueError):
parser.error(u'invalid playlist end number specified')
if opts.extractaudio:
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
@ -608,7 +590,7 @@ def _real_main(argv=None):
'username': opts.username,
'password': opts.password,
'videopassword': opts.videopassword,
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
'forceurl': opts.geturl,
'forcetitle': opts.gettitle,
'forceid': opts.getid,
@ -616,8 +598,9 @@ def _real_main(argv=None):
'forcedescription': opts.getdescription,
'forcefilename': opts.getfilename,
'forceformat': opts.getformat,
'forcejson': opts.dumpjson,
'simulate': opts.simulate,
'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
'format': opts.format,
'format_limit': opts.format_limit,
'listformats': opts.listformats,
@ -666,34 +649,12 @@ def _real_main(argv=None):
'youtube_print_sig_code': opts.youtube_print_sig_code,
'age_limit': opts.age_limit,
'download_archive': opts.download_archive,
'cookiefile': opts.cookiefile,
'nocheckcertificate': opts.no_check_certificate,
}
with YoutubeDL(ydl_opts) as ydl:
if opts.verbose:
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
try:
sp = subprocess.Popen(
['git', 'rev-parse', '--short', 'HEAD'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
cwd=os.path.dirname(os.path.abspath(__file__)))
out, err = sp.communicate()
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
write_string(u'[debug] Git HEAD: ' + out + u'\n')
except:
try:
sys.exc_clear()
except:
pass
write_string(u'[debug] Python version %s - %s' %
(platform.python_version(), platform_name()) + u'\n')
proxy_map = {}
for handler in opener.handlers:
if hasattr(handler, 'proxies'):
proxy_map.update(handler.proxies)
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
ydl.print_debug_header()
ydl.add_default_info_extractors()
# PostProcessors
@ -724,46 +685,9 @@ def _real_main(argv=None):
ydl.to_screen(u'--max-download limit reached, aborting.')
retcode = 101
# Dump cookie jar if requested
if opts.cookiefile is not None:
try:
jar.save()
except (IOError, OSError):
sys.exit(u'ERROR: unable to save cookie jar')
sys.exit(retcode)
def _setup_opener(jar=None, opts=None, timeout=300):
if opts is None:
FakeOptions = collections.namedtuple(
'FakeOptions', ['proxy', 'no_check_certificate'])
opts = FakeOptions(proxy=None, no_check_certificate=False)
cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
if opts.proxy is not None:
if opts.proxy == '':
proxies = {}
else:
proxies = {'http': opts.proxy, 'https': opts.proxy}
else:
proxies = compat_urllib_request.getproxies()
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
if 'http' in proxies and 'https' not in proxies:
proxies['https'] = proxies['http']
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
https_handler = make_HTTPS_handler(opts)
opener = compat_urllib_request.build_opener(
https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
opener.addheaders = []
compat_urllib_request.install_opener(opener)
socket.setdefaulttimeout(timeout)
return opener
def main(argv=None):
try:
_real_main(argv)

View File

@ -1,5 +1,6 @@
from .appletrailers import AppleTrailersIE
from .addanime import AddAnimeIE
from .anitube import AnitubeIE
from .archiveorg import ArchiveOrgIE
from .ard import ARDIE
from .arte import (
@ -10,7 +11,7 @@ from .arte import (
)
from .auengine import AUEngineIE
from .bambuser import BambuserIE, BambuserChannelIE
from .bandcamp import BandcampIE
from .bandcamp import BandcampIE, BandcampAlbumIE
from .bliptv import BlipTVIE, BlipTVUserIE
from .bloomberg import BloombergIE
from .breakcom import BreakIE
@ -19,12 +20,14 @@ from .c56 import C56IE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .cinemassacre import CinemassacreIE
from .clipfish import ClipfishIE
from .cnn import CNNIE
from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .condenast import CondeNastIE
from .criterion import CriterionIE
from .cspan import CSpanIE
from .d8 import D8IE
from .dailymotion import (
DailymotionIE,
DailymotionPlaylistIE,
@ -96,6 +99,7 @@ from .nba import NBAIE
from .nbc import NBCNewsIE
from .newgrounds import NewgroundsIE
from .nhl import NHLIE, NHLVideocenterIE
from .niconico import NiconicoIE
from .nowvideo import NowVideoIE
from .ooyala import OoyalaIE
from .orf import ORFIE
@ -126,12 +130,14 @@ from .spiegel import SpiegelIE
from .stanfordoc import StanfordOpenClassroomIE
from .statigram import StatigramIE
from .steam import SteamIE
from .streamcloud import StreamcloudIE
from .sztvhu import SztvHuIE
from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE
from .ted import TEDIE
from .tf1 import TF1IE
from .thisav import ThisAVIE
from .toutv import TouTvIE
from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE
from .tube8 import Tube8IE
@ -152,6 +158,7 @@ from .videofyme import VideofyMeIE
from .videopremium import VideoPremiumIE
from .vimeo import VimeoIE, VimeoChannelIE
from .vine import VineIE
from .viki import VikiIE
from .vk import VKIE
from .wat import WatIE
from .websurg import WeBSurgIE
@ -179,6 +186,7 @@ from .youtube import (
YoutubeTruncatedURLIE,
YoutubeWatchLaterIE,
YoutubeFavouritesIE,
YoutubeHistoryIE,
)
from .zdf import ZDFIE

View File

@ -0,0 +1,55 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
class AnitubeIE(InfoExtractor):
IE_NAME = u'anitube.se'
_VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.anitube.se/video/36621',
u'md5': u'59d0eeae28ea0bc8c05e7af429998d43',
u'file': u'36621.mp4',
u'info_dict': {
u'id': u'36621',
u'ext': u'mp4',
u'title': u'Recorder to Randoseru 01',
},
u'skip': u'Blocked in the US',
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
webpage, u'key')
webpage_config = self._download_webpage('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
key)
config_xml = xml.etree.ElementTree.fromstring(webpage_config.encode('utf-8'))
video_title = config_xml.find('title').text
formats = []
video_url = config_xml.find('file')
if video_url is not None:
formats.append({
'format_id': 'sd',
'url': video_url.text,
})
video_url = config_xml.find('filehd')
if video_url is not None:
formats.append({
'format_id': 'hd',
'url': video_url.text,
})
return {
'id': video_id,
'title': video_title,
'formats': formats
}

View File

@ -44,7 +44,6 @@ class AUEngineIE(InfoExtractor):
return {
'id': video_id,
'url': video_url,
'ext': ext,
'title': title,
'thumbnail': thumbnail,
}

View File

@ -25,6 +25,11 @@ class BambuserIE(InfoExtractor):
u'uploader': u'pixelversity',
u'uploader_id': u'344706',
},
u'params': {
# It doesn't respect the 'Range' header, it would download the whole video
# caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59
u'skip_download': True,
},
}
def _real_extract(self, url):

View File

@ -3,13 +3,16 @@ import re
from .common import InfoExtractor
from ..utils import (
compat_str,
compat_urlparse,
ExtractorError,
)
class BandcampIE(InfoExtractor):
IE_NAME = u'Bandcamp'
_VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
_TEST = {
_TESTS = [{
u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
u'file': u'1812978515.mp3',
u'md5': u'cdeb30cdae1921719a3cbcab696ef53c',
@ -17,7 +20,7 @@ class BandcampIE(InfoExtractor):
u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"
},
u'skip': u'There is a limit of 200 free downloads / month for the test song'
}
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@ -26,6 +29,23 @@ class BandcampIE(InfoExtractor):
# We get the link to the free download page
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
if m_download is None:
m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
if m_trackinfo:
json_code = m_trackinfo.group(1)
data = json.loads(json_code)
for d in data:
formats = [{
'format_id': 'format_id',
'url': format_url,
'ext': format_id.partition('-')[0]
} for format_id, format_url in sorted(d['file'].items())]
return {
'id': compat_str(d['id']),
'title': d['title'],
'formats': formats,
}
else:
raise ExtractorError(u'No free songs found')
download_link = m_download.group(1)
@ -61,3 +81,49 @@ class BandcampIE(InfoExtractor):
}
return [track_info]
class BandcampAlbumIE(InfoExtractor):
IE_NAME = u'Bandcamp:album'
_VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
_TEST = {
u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
u'playlist': [
{
u'file': u'1353101989.mp3',
u'md5': u'39bc1eded3476e927c724321ddf116cf',
u'info_dict': {
u'title': u'Intro',
}
},
{
u'file': u'38097443.mp3',
u'md5': u'1a2c32e2691474643e912cc6cd4bffaa',
u'info_dict': {
u'title': u'Kero One - Keep It Alive (Blazo remix)',
}
},
],
u'params': {
u'playlistend': 2
},
u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = mobj.group('title')
webpage = self._download_webpage(url, title)
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
if not tracks_paths:
raise ExtractorError(u'The page doesn\'t contain any track')
entries = [
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
for t_path in tracks_paths]
title = self._search_regex(r'album_title : "(.*?)"', webpage, u'title')
return {
'_type': 'playlist',
'title': title,
'entries': entries,
}

View File

@ -75,16 +75,22 @@ class BrightcoveIE(InfoExtractor):
params = {'flashID': object_doc.attrib['id'],
'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
}
playerKey = find_xpath_attr(object_doc, './param', 'name', 'playerKey')
def find_param(name):
node = find_xpath_attr(object_doc, './param', 'name', name)
if node is not None:
return node.attrib['value']
return None
playerKey = find_param('playerKey')
# Not all pages define this value
if playerKey is not None:
params['playerKey'] = playerKey.attrib['value']
videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
params['playerKey'] = playerKey
# The three fields hold the id of the video
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
if videoPlayer is not None:
params['@videoPlayer'] = videoPlayer.attrib['value']
linkBase = find_xpath_attr(object_doc, './param', 'name', 'linkBaseURL')
params['@videoPlayer'] = videoPlayer
linkBase = find_param('linkBaseURL')
if linkBase is not None:
params['linkBaseURL'] = linkBase.attrib['value']
params['linkBaseURL'] = linkBase
data = compat_urllib_parse.urlencode(params)
return cls._FEDERATED_URL_TEMPLATE % data

View File

@ -5,6 +5,7 @@ import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import unified_strdate
class CanalplusIE(InfoExtractor):
_VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
@ -25,7 +26,7 @@ class CanalplusIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = mobj.groupdict().get('id')
if video_id is None:
webpage = self._download_webpage(url, mobj.group('path'))
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')

View File

@ -12,27 +12,21 @@ class CinemassacreIE(InfoExtractor):
_TESTS = [{
u'url': u'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
u'file': u'19911.flv',
u'md5': u'f9bb7ede54d1229c9846e197b4737e06',
u'info_dict': {
u'upload_date': u'20121110',
u'title': u'“Angry Video Game Nerd: The Movie” Trailer',
u'description': u'md5:fb87405fcb42a331742a0dce2708560b',
},
u'params': {
# rtmp download
u'skip_download': True,
},
}
},
{
u'url': u'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
u'file': u'521be8ef82b16.flv',
u'md5': u'9509ee44dcaa7c1068604817c19a9e50',
u'info_dict': {
u'upload_date': u'20131002',
u'title': u'The Mummys Hand (1940)',
},
u'params': {
# rtmp download
u'skip_download': True,
},
}
}]
def _real_extract(self, url):

View File

@ -0,0 +1,53 @@
import re
import time
import xml.etree.ElementTree
from .common import InfoExtractor
class ClipfishIE(InfoExtractor):
IE_NAME = u'clipfish'
_VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
_TEST = {
u'url': u'http://www.clipfish.de/special/supertalent/video/4028320/supertalent-2013-ivana-opacak-singt-nobodys-perfect/',
u'file': u'4028320.f4v',
u'md5': u'5e38bda8c329fbfb42be0386a3f5a382',
u'info_dict': {
u'title': u'Supertalent 2013: Ivana Opacak singt Nobody\'s Perfect',
u'duration': 399,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
(video_id, int(time.time())))
info_xml = self._download_webpage(
info_url, video_id, note=u'Downloading info page')
doc = xml.etree.ElementTree.fromstring(info_xml)
title = doc.find('title').text
video_url = doc.find('filename').text
thumbnail = doc.find('imageurl').text
duration_str = doc.find('duration').text
m = re.match(
r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
duration_str)
if m:
duration = (
(int(m.group('hours')) * 60 * 60) +
(int(m.group('minutes')) * 60) +
(int(m.group('seconds')))
)
else:
duration = None
return {
'id': video_id,
'title': title,
'url': video_url,
'thumbnail': thumbnail,
'duration': duration,
}

View File

@ -1,5 +1,4 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
@ -46,11 +45,10 @@ class CollegeHumorIE(InfoExtractor):
self.report_extraction(video_id)
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
metaXml = self._download_webpage(xmlUrl, video_id,
mdoc = self._download_xml(xmlUrl, video_id,
u'Downloading info XML',
u'Unable to download video info XML')
mdoc = xml.etree.ElementTree.fromstring(metaXml)
try:
videoNode = mdoc.findall('./video')[0]
youtubeIdNode = videoNode.find('./youtubeID')
@ -65,16 +63,13 @@ class CollegeHumorIE(InfoExtractor):
if next_url.endswith(u'manifest.f4m'):
manifest_url = next_url + '?hdcore=2.10.3'
manifestXml = self._download_webpage(manifest_url, video_id,
adoc = self._download_xml(manifest_url, video_id,
u'Downloading XML manifest',
u'Unable to download video info XML')
adoc = xml.etree.ElementTree.fromstring(manifestXml)
try:
media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
node_id = media_node.attrib['url']
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
except IndexError as err:
except IndexError:
raise ExtractorError(u'Invalid manifest file')
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')

View File

@ -2,6 +2,7 @@ import re
import xml.etree.ElementTree
from .common import InfoExtractor
from .mtv import MTVIE, _media_xml_tag
from ..utils import (
compat_str,
compat_urllib_parse,
@ -11,7 +12,37 @@ from ..utils import (
)
class ComedyCentralIE(InfoExtractor):
class ComedyCentralIE(MTVIE):
_VALID_URL = r'http://www.comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
_FEED_URL = u'http://comedycentral.com/feeds/mrss/'
_TEST = {
u'url': u'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
u'md5': u'4167875aae411f903b751a21f357f1ee',
u'info_dict': {
u'id': u'cef0cbb3-e776-4bc9-b62e-8016deccb354',
u'ext': u'mp4',
u'title': u'Uncensored - Greg Fitzsimmons - Too Good of a Mother',
u'description': u'After a certain point, breastfeeding becomes c**kblocking.',
},
}
# Overwrite MTVIE properties we don't want
_TESTS = []
def _get_thumbnail_url(self, uri, itemdoc):
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
return itemdoc.find(search_path).attrib['url']
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = mobj.group('title')
webpage = self._download_webpage(url, title)
mgid = self._search_regex(r'data-mgid="(?P<mgid>mgid:.*?)"',
webpage, u'mgid')
return self._get_videos_info(mgid)
class ComedyCentralShowsIE(InfoExtractor):
IE_DESC = u'The Daily Show / Colbert Report'
# urls can be abbreviations like :thedailyshow or :colbert
# urls for episodes like:

View File

@ -4,11 +4,11 @@ import re
import socket
import sys
import netrc
import xml.etree.ElementTree
from ..utils import (
compat_http_client,
compat_urllib_error,
compat_urllib_request,
compat_str,
clean_html,
@ -19,6 +19,7 @@ from ..utils import (
unescapeHTML,
)
class InfoExtractor(object):
"""Information Extractor class.
@ -75,6 +76,7 @@ class InfoExtractor(object):
* acodec Name of the audio codec in use
* vbr Average video bitrate in KBit/s
* vcodec Name of the video codec in use
* filesize The number of bytes, if known in advance
webpage_url: The url to the video webpage, if given to youtube-dl it
should allow to get the same result again. (It will be set
by YoutubeDL if it's missing)
@ -156,7 +158,7 @@ class InfoExtractor(object):
elif note is not False:
self.to_screen(u'%s: %s' % (video_id, note))
try:
return compat_urllib_request.urlopen(url_or_request)
return self._downloader.urlopen(url_or_request)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
if errnote is None:
errnote = u'Unable to download webpage'
@ -208,6 +210,11 @@ class InfoExtractor(object):
""" Returns the data of the page as a string """
return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
def _download_xml(self, url_or_request, video_id, note=u'Downloading XML', errnote=u'Unable to downloand XML'):
"""Return the xml as an xml.etree.ElementTree.Element"""
xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
def to_screen(self, msg):
"""Print msg to screen, prefixing it with '[ie_name]'"""
self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
@ -229,12 +236,14 @@ class InfoExtractor(object):
self.to_screen(u'Logging in')
#Methods for following #608
def url_result(self, url, ie=None):
def url_result(self, url, ie=None, video_id=None):
"""Returns a url that points to a page that should be processed"""
#TODO: ie should be the class used for getting the info
video_info = {'_type': 'url',
'url': url,
'ie_key': ie}
if video_id is not None:
video_info['id'] = video_id
return video_info
def playlist_result(self, entries, playlist_id=None, playlist_title=None):
"""Returns a playlist"""
@ -350,6 +359,17 @@ class InfoExtractor(object):
if secure: regexes = self._og_regexes('video:secure_url') + regexes
return self._html_search_regex(regexes, html, name, **kargs)
def _html_search_meta(self, name, html, display_name=None):
if display_name is None:
display_name = name
return self._html_search_regex(
r'''(?ix)<meta(?=[^>]+(?:name|property)=["\']%s["\'])
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
html, display_name, fatal=False)
def _dc_search_uploader(self, html):
return self._html_search_meta('dc.creator', html, 'uploader')
def _rta_search(self, html):
# See http://www.rtalabel.org/index.php?content=howtofaq#single
if re.search(r'(?ix)<meta\s+name="rating"\s+'
@ -358,6 +378,23 @@ class InfoExtractor(object):
return 18
return 0
def _media_rating_search(self, html):
# See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/
rating = self._html_search_meta('rating', html)
if not rating:
return None
RATING_TABLE = {
'safe for kids': 0,
'general': 8,
'14 years': 14,
'mature': 17,
'restricted': 19,
}
return RATING_TABLE.get(rating.lower(), None)
class SearchInfoExtractor(InfoExtractor):
"""

View File

@ -0,0 +1,22 @@
# encoding: utf-8
from .canalplus import CanalplusIE
class D8IE(CanalplusIE):
_VALID_URL = r'https?://www\.d8\.tv/.*?/(?P<path>.*)'
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/d8/%s'
IE_NAME = u'd8.tv'
_TEST = {
u'url': u'http://www.d8.tv/d8-docs-mags/pid6589-d8-campagne-intime.html',
u'file': u'966289.flv',
u'info_dict': {
u'title': u'Campagne intime - Documentaire exceptionnel',
u'description': u'md5:d2643b799fb190846ae09c61e59a859f',
u'upload_date': u'20131108',
},
u'params': {
# rtmp
u'skip_download': True,
},
}

View File

@ -1,4 +1,3 @@
import itertools
import json
import random
import re

View File

@ -11,11 +11,11 @@ from ..utils import (
class EscapistIE(InfoExtractor):
_VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
_VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
_TEST = {
u'url': u'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
u'file': u'6618-Breaking-Down-Baldurs-Gate.mp4',
u'md5': u'c6793dbda81388f4264c1ba18684a74d',
u'md5': u'ab3a706c681efca53f0a35f1415cf0d1',
u'info_dict': {
u"description": u"Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
u"uploader": u"the-escapist-presents",
@ -25,50 +25,60 @@ class EscapistIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
showName = mobj.group('showname')
videoId = mobj.group('episode')
self.report_extraction(videoId)
webpage = self._download_webpage(url, videoId)
videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"',
videoDesc = self._html_search_regex(
r'<meta name="description" content="([^"]*)"',
webpage, u'description', fatal=False)
playerUrl = self._og_search_video_url(webpage, name='player url')
playerUrl = self._og_search_video_url(webpage, name=u'player URL')
title = self._html_search_regex('<meta name="title" content="([^"]*)"',
webpage, u'player url').split(' : ')[-1]
title = self._html_search_regex(
r'<meta name="title" content="([^"]*)"',
webpage, u'title').split(' : ')[-1]
configUrl = self._search_regex('config=(.*)$', playerUrl, u'config url')
configUrl = self._search_regex('config=(.*)$', playerUrl, u'config URL')
configUrl = compat_urllib_parse.unquote(configUrl)
configJSON = self._download_webpage(configUrl, videoId,
u'Downloading configuration',
u'unable to download configuration')
formats = []
# Technically, it's JavaScript, not JSON
configJSON = configJSON.replace("'", '"')
def _add_format(name, cfgurl):
configJSON = self._download_webpage(
cfgurl, videoId,
u'Downloading ' + name + ' configuration',
u'Unable to download ' + name + ' configuration')
# Technically, it's JavaScript, not JSON
configJSON = configJSON.replace("'", '"')
try:
config = json.loads(configJSON)
except (ValueError,) as err:
raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err))
playlist = config['playlist']
formats.append({
'url': playlist[1]['url'],
'format_id': name,
})
_add_format(u'normal', configUrl)
hq_url = (configUrl +
('&hq=1' if '?' in configUrl else configUrl + '?hq=1'))
try:
config = json.loads(configJSON)
except (ValueError,) as err:
raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err))
_add_format(u'hq', hq_url)
except ExtractorError:
pass # That's fine, we'll just use normal quality
playlist = config['playlist']
videoUrl = playlist[1]['url']
info = {
return {
'id': videoId,
'url': videoUrl,
'formats': formats,
'uploader': showName,
'upload_date': None,
'title': title,
'ext': 'mp4',
'thumbnail': self._og_search_thumbnail(webpage),
'description': videoDesc,
'player_url': playerUrl,
}
return [info]

View File

@ -1,5 +1,4 @@
import json
import netrc
import re
import socket

View File

@ -39,7 +39,6 @@ class FKTVIE(InfoExtractor):
for i, _ in enumerate(files, 1):
video_id = '%04d%d' % (episode, i)
video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i)
video_title = 'Fernsehkritik %d.%d' % (episode, i)
videos.append({
'id': video_id,
'url': video_url,

View File

@ -24,7 +24,7 @@ class GameSpotIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
page_id = video_id = mobj.group('page_id')
page_id = mobj.group('page_id')
webpage = self._download_webpage(url, page_id)
data_video_json = self._search_regex(r'data-video=\'(.*?)\'', webpage, u'data video')
data_video = json.loads(unescapeHTML(data_video_json))

View File

@ -199,7 +199,8 @@ class GenericIE(InfoExtractor):
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
if mobj is not None:
burl = unescapeHTML(mobj.group(1))
return self.url_result(burl, 'Bandcamp')
# Don't set the extractor because it can be a track url or an album
return self.url_result(burl)
# Start with something easy: JW Player in SWFObject
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
@ -208,7 +209,7 @@ class GenericIE(InfoExtractor):
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
if mobj is None:
# Broaden the search a little bit: JWPlayer JS loader
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"&]*)', webpage)
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"]*)', webpage)
if mobj is None:
# Try to find twitter cards info
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
@ -235,18 +236,16 @@ class GenericIE(InfoExtractor):
video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
# here's a fun little line of code for you:
video_extension = os.path.splitext(video_id)[1][1:]
video_id = os.path.splitext(video_id)[0]
# video uploader is domain name
video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
url, u'video uploader')
return [{
return {
'id': video_id,
'url': video_url,
'uploader': video_uploader,
'upload_date': None,
'title': video_title,
'ext': video_extension,
}]
}

View File

@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
_TEST = {
u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
u'file': u'390161.mp4',
u'md5': u'1d7ba54e2c9d7dc6935ef39e00529138',
u'md5': u'8b743df908c42f60cf6496586c7f12c3',
u'info_dict': {
u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.",
u"title": u"How to Tie a Square Knot Properly"

View File

@ -22,7 +22,7 @@ class JeuxVideoIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = re.match(self._VALID_URL, url).group(1)
title = mobj.group(1)
webpage = self._download_webpage(url, title)
xml_link = self._html_search_regex(
r'<param name="flashvars" value="config=(.*?)" />',

View File

@ -6,9 +6,7 @@ from .common import InfoExtractor
from ..utils import (
compat_urllib_parse_urlparse,
compat_urlparse,
get_meta_content,
xpath_with_ns,
ExtractorError,
)

View File

@ -60,7 +60,7 @@ class MixcloudIE(InfoExtractor):
'title': info['name'],
'url': final_song_url,
'ext': 'mp3',
'description': info['description'],
'description': info.get('description'),
'thumbnail': info['pictures'].get('extra_large'),
'uploader': info['user']['name'],
'uploader_id': info['user']['username'],

View File

@ -59,7 +59,6 @@ class MTVIE(InfoExtractor):
if '/error_country_block.swf' in metadataXml:
raise ExtractorError(u'This video is not available from your country.', expected=True)
mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8'))
renditions = mdoc.findall('.//rendition')
formats = []
for rendition in mdoc.findall('.//rendition'):

View File

@ -72,7 +72,7 @@ class NHLIE(NHLBaseInfoExtractor):
class NHLVideocenterIE(NHLBaseInfoExtractor):
IE_NAME = u'nhl.com:videocenter'
IE_DESC = u'Download the first 12 videos from a videocenter category'
IE_DESC = u'NHL videocenter category'
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?'
@classmethod

View File

@ -0,0 +1,131 @@
# encoding: utf-8
import re
import socket
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
compat_http_client,
compat_urllib_error,
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
compat_str,
ExtractorError,
unified_strdate,
)
class NiconicoIE(InfoExtractor):
IE_NAME = u'niconico'
IE_DESC = u'ニコニコ動画'
_TEST = {
u'url': u'http://www.nicovideo.jp/watch/sm22312215',
u'file': u'sm22312215.mp4',
u'md5': u'd1a75c0823e2f629128c43e1212760f9',
u'info_dict': {
u'title': u'Big Buck Bunny',
u'uploader': u'takuya0301',
u'uploader_id': u'2698420',
u'upload_date': u'20131123',
u'description': u'(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
},
u'params': {
u'username': u'ydl.niconico@gmail.com',
u'password': u'youtube-dl',
},
}
_VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
_NETRC_MACHINE = 'niconico'
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = True
def _real_initialize(self):
self._login()
def _login(self):
(username, password) = self._get_login_info()
# No authentication to be performed
if username is None:
if self._LOGIN_REQUIRED:
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
return False
# Log in
login_form_strs = {
u'mail': username,
u'password': password,
}
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
# chokes on unicode
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
request = compat_urllib_request.Request(
u'https://secure.nicovideo.jp/secure/login', login_data)
login_results = self._download_webpage(
request, u'', note=u'Logging in', errnote=u'Unable to log in')
if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
self._downloader.report_warning(u'unable to log in: bad username or password')
return False
return True
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
# Get video webpage. We are not actually interested in it, but need
# the cookies in order to be able to download the info webpage
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
video_info_webpage = self._download_webpage(
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
note=u'Downloading video info page')
# Get flv info
flv_info_webpage = self._download_webpage(
u'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
video_id, u'Downloading flv info')
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
# Start extracting information
video_info = xml.etree.ElementTree.fromstring(video_info_webpage)
video_title = video_info.find('.//title').text
video_extension = video_info.find('.//movie_type').text
video_format = video_extension.upper()
video_thumbnail = video_info.find('.//thumbnail_url').text
video_description = video_info.find('.//description').text
video_uploader_id = video_info.find('.//user_id').text
video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
video_view_count = video_info.find('.//view_counter').text
video_webpage_url = video_info.find('.//watch_url').text
# uploader
video_uploader = video_uploader_id
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
try:
user_info_webpage = self._download_webpage(
url, video_id, note=u'Downloading user information')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
else:
user_info = xml.etree.ElementTree.fromstring(user_info_webpage)
video_uploader = user_info.find('.//nickname').text
return {
'id': video_id,
'url': video_real_url,
'title': video_title,
'ext': video_extension,
'format': video_format,
'thumbnail': video_thumbnail,
'description': video_description,
'uploader': video_uploader,
'upload_date': video_upload_date,
'uploader_id': video_uploader_id,
'view_count': video_view_count,
'webpage_url': video_webpage_url,
}

View File

@ -6,7 +6,6 @@ from ..utils import (
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urllib_parse,
unescapeHTML,
)
from ..aes import (
aes_decrypt_text

View File

@ -59,6 +59,7 @@ class SoundcloudIE(InfoExtractor):
]
_CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
@classmethod
def suitable(cls, url):
@ -75,36 +76,79 @@ class SoundcloudIE(InfoExtractor):
def _extract_info_dict(self, info, full_title=None, quiet=False):
track_id = compat_str(info['id'])
name = full_title or track_id
if quiet == False:
if quiet:
self.report_extraction(name)
thumbnail = info['artwork_url']
if thumbnail is not None:
thumbnail = thumbnail.replace('-large', '-t500x500')
ext = info.get('original_format', u'mp3')
result = {
'id': track_id,
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
'id': track_id,
'uploader': info['user']['username'],
'upload_date': unified_strdate(info['created_at']),
'title': info['title'],
'ext': info.get('original_format', u'mp3'),
'title': info['title'],
'description': info['description'],
'thumbnail': thumbnail,
}
if info.get('downloadable', False):
result['url'] = 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(track_id, self._CLIENT_ID)
if not info.get('streamable', False):
# We have to get the rtmp url
# We can build a direct link to the song
format_url = (
u'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
track_id, self._CLIENT_ID))
result['formats'] = [{
'format_id': 'download',
'ext': ext,
'url': format_url,
'vcodec': 'none',
}]
else:
# We have to retrieve the url
stream_json = self._download_webpage(
'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._CLIENT_ID),
'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._IPHONE_CLIENT_ID),
track_id, u'Downloading track url')
rtmp_url = json.loads(stream_json)['rtmp_mp3_128_url']
# The url doesn't have an rtmp app, we have to extract the playpath
url, path = rtmp_url.split('mp3:', 1)
result.update({
'url': url,
'play_path': 'mp3:' + path,
})
formats = []
format_dict = json.loads(stream_json)
for key, stream_url in format_dict.items():
if key.startswith(u'http'):
formats.append({
'format_id': key,
'ext': ext,
'url': stream_url,
'vcodec': 'none',
})
elif key.startswith(u'rtmp'):
# The url doesn't have an rtmp app, we have to extract the playpath
url, path = stream_url.split('mp3:', 1)
formats.append({
'format_id': key,
'url': url,
'play_path': 'mp3:' + path,
'ext': ext,
'vcodec': 'none',
})
if not formats:
# We fallback to the stream_url in the original info, this
# cannot be always used, sometimes it can give an HTTP 404 error
formats.append({
'format_id': u'fallback',
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
'ext': ext,
'vcodec': 'none',
})
def format_pref(f):
if f['format_id'].startswith('http'):
return 2
if f['format_id'].startswith('rtmp'):
return 1
return 0
formats.sort(key=format_pref)
result['formats'] = formats
return result
def _real_extract(self, url):
@ -158,7 +202,6 @@ class SoundcloudSetIE(SoundcloudIE):
resolv_url = self._resolv_url(url)
info_json = self._download_webpage(resolv_url, full_title)
videos = []
info = json.loads(info_json)
if 'errors' in info:
for err in info['errors']:

View File

@ -6,7 +6,6 @@ from ..utils import (
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urllib_parse,
unescapeHTML,
)
from ..aes import (
aes_decrypt_text
@ -36,11 +35,12 @@ class SpankwireIE(InfoExtractor):
webpage = self._download_webpage(req, video_id)
video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, u'title')
video_uploader = self._html_search_regex(r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False)
thumbnail = self._html_search_regex(r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False)
description = self._html_search_regex(r'>\s*Description:</div>\s*<[^>]*>([^<]+)', webpage, u'description', fatal=False)
if len(description) == 0:
description = None
video_uploader = self._html_search_regex(
r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False)
thumbnail = self._html_search_regex(
r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False)
description = self._html_search_regex(
r'<div\s+id="descriptionContent">([^<]+)<', webpage, u'description', fatal=False)
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
if webpage.find('flashvars\.encrypted = "true"') != -1:

View File

@ -2,7 +2,6 @@ import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import determine_ext
class SpiegelIE(InfoExtractor):

View File

@ -0,0 +1,66 @@
# coding: utf-8
import re
import time
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
compat_urllib_request,
)
class StreamcloudIE(InfoExtractor):
IE_NAME = u'streamcloud.eu'
_VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html'
_TEST = {
u'url': u'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
u'file': u'skp9j99s4bpz.mp4',
u'md5': u'6bea4c7fa5daaacc2a946b7146286686',
u'info_dict': {
u'title': u'youtube-dl test video \'/\\ ä ↭',
u'duration': 9,
},
u'skip': u'Only available from the EU'
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
orig_webpage = self._download_webpage(url, video_id)
fields = re.findall(r'''(?x)<input\s+
type="(?:hidden|submit)"\s+
name="([^"]+)"\s+
(?:id="[^"]+"\s+)?
value="([^"]*)"
''', orig_webpage)
post = compat_urllib_parse.urlencode(fields)
self.to_screen('%s: Waiting for timeout' % video_id)
time.sleep(12)
headers = {
b'Content-Type': b'application/x-www-form-urlencoded',
}
req = compat_urllib_request.Request(url, post, headers)
webpage = self._download_webpage(
req, video_id, note=u'Downloading video page ...')
title = self._html_search_regex(
r'<h1[^>]*>([^<]+)<', webpage, u'title')
video_url = self._search_regex(
r'file:\s*"([^"]+)"', webpage, u'video URL')
duration_str = self._search_regex(
r'duration:\s*"?([0-9]+)"?', webpage, u'duration', fatal=False)
duration = None if duration_str is None else int(duration_str)
thumbnail = self._search_regex(
r'image:\s*"([^"]+)"', webpage, u'thumbnail URL', fatal=False)
return {
'id': video_id,
'title': title,
'url': video_url,
'duration': duration,
'thumbnail': thumbnail,
}

View File

@ -15,7 +15,8 @@ class SztvHuIE(InfoExtractor):
u'info_dict': {
u"title": u"Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren",
u"description": u'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
}
},
u'skip': u'Service temporarily disabled as of 2013-11-20'
}
def _real_extract(self, url):

View File

@ -60,7 +60,7 @@ class TeamcocoIE(InfoExtractor):
return -1
formats.sort(key=sort_key)
if not formats:
raise RegexNotFoundError(u'Unable to extract video URL')
raise ExtractorError(u'Unable to extract video URL')
return {
'id': video_id,

View File

@ -4,7 +4,6 @@ import re
from .subtitles import SubtitlesInfoExtractor
from ..utils import (
compat_str,
RegexNotFoundError,
)
@ -113,6 +112,6 @@ class TEDIE(SubtitlesInfoExtractor):
url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
sub_lang_list[l] = url
return sub_lang_list
except RegexNotFoundError as err:
except RegexNotFoundError:
self._downloader.report_warning(u'video doesn\'t have subtitles')
return {}

View File

@ -0,0 +1,74 @@
# coding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
ExtractorError,
unified_strdate,
)
class TouTvIE(InfoExtractor):
IE_NAME = u'tou.tv'
_VALID_URL = r'https?://www\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/(?P<episode>S[0-9]+E[0-9]+)))'
_TEST = {
u'url': u'http://www.tou.tv/30-vies/S04E41',
u'file': u'30-vies_S04E41.mp4',
u'info_dict': {
u'title': u'30 vies Saison 4 / Épisode 41',
u'description': u'md5:da363002db82ccbe4dafeb9cab039b09',
u'age_limit': 8,
u'uploader': u'Groupe des Nouveaux Médias',
u'duration': 1296,
u'upload_date': u'20131118',
u'thumbnail': u'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg',
},
u'params': {
u'skip_download': True, # Requires rtmpdump
},
u'skip': 'Only available in Canada'
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
mediaId = self._search_regex(
r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
streams_webpage = self._download_webpage(
streams_url, video_id, note=u'Downloading stream list')
streams_doc = xml.etree.ElementTree.fromstring(
streams_webpage.encode('utf-8'))
video_url = next(n.text
for n in streams_doc.findall('.//choice/url')
if u'//ad.doubleclick' not in n.text)
if video_url.endswith('/Unavailable.flv'):
raise ExtractorError(
u'Access to this video is blocked from outside of Canada',
expected=True)
duration_str = self._html_search_meta(
'video:duration', webpage, u'duration')
duration = int(duration_str) if duration_str else None
upload_date_str = self._html_search_meta(
'video:release_date', webpage, u'upload date')
upload_date = unified_strdate(upload_date_str) if upload_date_str else None
return {
'id': video_id,
'title': self._og_search_title(webpage),
'url': video_url,
'description': self._og_search_description(webpage),
'uploader': self._dc_search_uploader(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'age_limit': self._media_rating_search(webpage),
'duration': duration,
'upload_date': upload_date,
'ext': 'mp4',
}

View File

@ -5,8 +5,6 @@ from .common import InfoExtractor
from ..utils import (
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urllib_parse,
unescapeHTML,
)
from ..aes import (
aes_decrypt_text

View File

@ -9,12 +9,10 @@ class VideoPremiumIE(InfoExtractor):
_TEST = {
u'url': u'http://videopremium.tv/4w7oadjsf156',
u'file': u'4w7oadjsf156.f4v',
u'md5': u'e51e4a266aab7531c6ac06f4ffee3b0d',
u'info_dict': {
u"title": u"youtube-dl_test_video____a_________-BaW_jenozKc.mp4.mp4"
},
u'params': {
u'skip_download': True,
},
}
}
def _real_extract(self, url):
@ -24,12 +22,16 @@ class VideoPremiumIE(InfoExtractor):
webpage_url = 'http://videopremium.tv/' + video_id
webpage = self._download_webpage(webpage_url, video_id)
self.report_extraction(video_id)
if re.match(r"^<html><head><script[^>]*>window.location\s*=", webpage):
# Download again, we need a cookie
webpage = self._download_webpage(
webpage_url, video_id,
note=u'Downloading webpage again (with cookie)')
video_title = self._html_search_regex(r'<h2(?:.*?)>\s*(.+?)\s*<',
webpage, u'video title')
video_title = self._html_search_regex(
r'<h2(?:.*?)>\s*(.+?)\s*<', webpage, u'video title')
return [{
return {
'id': video_id,
'url': "rtmp://e%d.md.iplay.md/play" % random.randint(1, 16),
'play_path': "mp4:%s.f4v" % video_id,
@ -37,4 +39,4 @@ class VideoPremiumIE(InfoExtractor):
'player_url': "http://videopremium.tv/uplayer/uppod.swf",
'ext': 'f4v',
'title': video_title,
}]
}

View File

@ -0,0 +1,101 @@
import re
from ..utils import (
ExtractorError,
unescapeHTML,
unified_strdate,
)
from .subtitles import SubtitlesInfoExtractor
class VikiIE(SubtitlesInfoExtractor):
IE_NAME = u'viki'
_VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
_TEST = {
u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14',
u'file': u'1023585v.mp4',
u'md5': u'a21454021c2646f5433514177e2caa5f',
u'info_dict': {
u'title': u'Heirs Episode 14',
u'uploader': u'SBS',
u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
u'upload_date': u'20131121',
u'age_limit': 13,
},
u'skip': u'Blocked in the US',
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
uploader_m = re.search(
r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage)
if uploader_m is None:
uploader = None
else:
uploader = uploader_m.group(1).strip()
rating_str = self._html_search_regex(
r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
u'rating information', default='').strip()
RATINGS = {
'G': 0,
'PG': 10,
'PG-13': 13,
'R': 16,
'NC': 18,
}
age_limit = RATINGS.get(rating_str)
info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
info_webpage = self._download_webpage(
info_url, video_id, note=u'Downloading info page')
if re.match(r'\s*<div\s+class="video-error', info_webpage):
raise ExtractorError(
u'Video %s is blocked from your location.' % video_id,
expected=True)
video_url = self._html_search_regex(
r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
upload_date_str = self._html_search_regex(
r'"created_at":"([^"]+)"', info_webpage, u'upload date')
upload_date = (
unified_strdate(upload_date_str)
if upload_date_str is not None
else None
)
# subtitles
video_subtitles = self.extract_subtitles(video_id, info_webpage)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, info_webpage)
return
return {
'id': video_id,
'title': title,
'url': video_url,
'description': description,
'thumbnail': thumbnail,
'age_limit': age_limit,
'uploader': uploader,
'subtitles': video_subtitles,
'upload_date': upload_date,
}
def _get_available_subtitles(self, video_id, info_webpage):
res = {}
for sturl_html in re.findall(r'<track src="([^"]+)"/>', info_webpage):
sturl = unescapeHTML(sturl_html)
m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
if not m:
continue
res[m.group('lang')] = sturl
return res

View File

@ -151,7 +151,7 @@ class VimeoIE(InfoExtractor):
config = json.loads(config_json)
except RegexNotFoundError:
# For pro videos or player.vimeo.com urls
config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'],
config = self._search_regex([r' = {config:({.+?}),assets:', r'(?:c|b)=({.+?});'],
webpage, u'info section', flags=re.DOTALL)
config = json.loads(config)
except Exception as e:

View File

@ -5,7 +5,6 @@ from .common import InfoExtractor
from ..utils import (
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urllib_parse,
)
class XTubeIE(InfoExtractor):

View File

@ -139,10 +139,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
IE_DESC = u'YouTube.com'
_VALID_URL = r"""(?xi)^
_VALID_URL = r"""(?x)^
(
(?:https?://|//)? # http(s):// or protocol-independent URL (optional)
(?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
tube\.majestyc\.net/|
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
(?:.*?\#/)? # handle anchor (#/) redirect urls
@ -248,21 +248,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'248': 'webm',
}
_video_dimensions = {
'5': '240x400',
'5': '400x240',
'6': '???',
'13': '???',
'17': '144x176',
'18': '360x640',
'22': '720x1280',
'34': '360x640',
'35': '480x854',
'36': '240x320',
'37': '1080x1920',
'38': '3072x4096',
'43': '360x640',
'44': '480x854',
'45': '720x1280',
'46': '1080x1920',
'17': '176x144',
'18': '640x360',
'22': '1280x720',
'34': '640x360',
'35': '854x480',
'36': '320x240',
'37': '1920x1080',
'38': '4096x3072',
'43': '640x360',
'44': '854x480',
'45': '1280x720',
'46': '1920x1080',
'82': '360p',
'83': '480p',
'84': '720p',
@ -1510,7 +1510,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
})
return results
class YoutubePlaylistIE(InfoExtractor):
class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
IE_DESC = u'YouTube.com playlists'
_VALID_URL = r"""(?:
(?:https?://)?
@ -1526,8 +1526,9 @@ class YoutubePlaylistIE(InfoExtractor):
|
((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
)"""
_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
_MAX_RESULTS = 50
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
_MORE_PAGES_INDICATOR = r'data-link-type="next"'
_VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&amp;'
IE_NAME = u'youtube:playlist'
@classmethod
@ -1535,6 +1536,9 @@ class YoutubePlaylistIE(InfoExtractor):
"""Receives a URL and returns True if suitable for this IE."""
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
def _real_initialize(self):
self._login()
def _real_extract(self, url):
# Extract playlist id
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@ -1548,45 +1552,28 @@ class YoutubePlaylistIE(InfoExtractor):
video_id = query_dict['v'][0]
if self._downloader.params.get('noplaylist'):
self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube')
return self.url_result(video_id, 'Youtube', video_id=video_id)
else:
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
# Download playlist videos from API
videos = []
# Extract the video ids from the playlist pages
ids = []
for page_num in itertools.count(1):
start_index = self._MAX_RESULTS * (page_num - 1) + 1
if start_index >= 1000:
self._downloader.report_warning(u'Max number of results reached')
break
url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
url = self._TEMPLATE_URL % (playlist_id, page_num)
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
# The ids are duplicated
new_ids = orderedSet(re.findall(self._VIDEO_RE, page))
ids.extend(new_ids)
try:
response = json.loads(page)
except ValueError as err:
raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
if 'feed' not in response:
raise ExtractorError(u'Got a malformed response from YouTube API')
playlist_title = response['feed']['title']['$t']
if 'entry' not in response['feed']:
# Number of videos is a multiple of self._MAX_RESULTS
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
break
for entry in response['feed']['entry']:
index = entry['yt$position']['$t']
if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
videos.append((
index,
'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
))
playlist_title = self._og_search_title(page)
videos = [v[1] for v in sorted(videos)]
url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
return [self.playlist_result(url_results, playlist_id, playlist_title)]
url_results = [self.url_result(vid_id, 'Youtube', video_id=vid_id)
for vid_id in ids]
return self.playlist_result(url_results, playlist_id, playlist_title)
class YoutubeChannelIE(InfoExtractor):
@ -1640,9 +1627,9 @@ class YoutubeChannelIE(InfoExtractor):
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
return [self.playlist_result(url_entries, channel_id)]
url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in video_ids]
return self.playlist_result(url_entries, channel_id)
class YoutubeUserIE(InfoExtractor):
@ -1706,9 +1693,11 @@ class YoutubeUserIE(InfoExtractor):
if len(ids_in_page) < self._GDATA_PAGE_SIZE:
break
urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
return [self.playlist_result(url_results, playlist_title = username)]
url_results = [
self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in video_ids]
return self.playlist_result(url_results, playlist_title=username)
class YoutubeSearchIE(SearchInfoExtractor):
IE_DESC = u'YouTube.com searches'
@ -1749,7 +1738,8 @@ class YoutubeSearchIE(SearchInfoExtractor):
if len(video_ids) > n:
video_ids = video_ids[:n]
videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in video_ids]
return self.playlist_result(videos, query)
class YoutubeSearchDateIE(YoutubeSearchIE):
@ -1809,7 +1799,9 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
feed_html = info['feed_html']
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
ids = orderedSet(m.group(1) for m in m_ids)
feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
feed_entries.extend(
self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in ids)
if info['paging'] is None:
break
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
@ -1834,6 +1826,20 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
_PAGING_STEP = 100
_PERSONAL_FEED = True
class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)'
_VALID_URL = u'https?://www\.youtube\.com/feed/history|:ythistory'
_FEED_NAME = 'history'
_PERSONAL_FEED = True
_PLAYLIST_TITLE = u'Youtube Watch History'
def _real_extract(self, url):
webpage = self._download_webpage('https://www.youtube.com/feed/history', u'History')
data_paging = self._search_regex(r'data-paging="(\d+)"', webpage, u'data-paging')
# The step is actually a ridiculously big number (like 1374343569725646)
self._PAGING_STEP = int(data_paging)
return super(YoutubeHistoryIE, self)._real_extract(url)
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
IE_NAME = u'youtube:favorites'
IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'

View File

@ -1,75 +1,111 @@
import operator
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
ExtractorError,
parse_xml_doc,
unified_strdate,
)
class ZDFIE(InfoExtractor):
_VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek(?P<hash>#)?\/(.*beitrag\/video\/)(?P<video_id>[^/\?]+)(?:\?.*)?'
_MEDIA_STREAM = r'<a href="(?P<video_url>.+(?P<media_type>.streaming).+/zdf/(?P<quality>[^\/]+)/[^"]*)".+class="play".+>'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group('video_id')
if mobj.group('hash'):
url = url.replace(u'#', u'', 1)
xml_url = u'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
info_xml = self._download_webpage(
xml_url, video_id, note=u'Downloading video info')
doc = parse_xml_doc(info_xml)
html = self._download_webpage(url, video_id)
streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)]
if streams is None:
raise ExtractorError(u'No media url found.')
title = doc.find('.//information/title').text
description = doc.find('.//information/detail').text
uploader_node = doc.find('.//details/originChannelTitle')
uploader = None if uploader_node is None else uploader_node.text
duration_str = doc.find('.//details/length').text
duration_m = re.match(r'''(?x)^
(?P<hours>[0-9]{2})
:(?P<minutes>[0-9]{2})
:(?P<seconds>[0-9]{2})
(?:\.(?P<ms>[0-9]+)?)
''', duration_str)
duration = (
(
(int(duration_m.group('hours')) * 60 * 60) +
(int(duration_m.group('minutes')) * 60) +
int(duration_m.group('seconds'))
)
if duration_m
else None
)
upload_date = unified_strdate(doc.find('.//details/airtime').text)
# s['media_type'] == 'wstreaming' -> use 'Windows Media Player' and mms url
# s['media_type'] == 'hstreaming' -> use 'Quicktime' and rtsp url
# choose first/default media type and highest quality for now
def stream_pref(s):
TYPE_ORDER = ['ostreaming', 'hstreaming', 'wstreaming']
def xml_to_format(fnode):
video_url = fnode.find('url').text
is_available = u'http://www.metafilegenerator' not in video_url
format_id = fnode.attrib['basetype']
format_m = re.match(r'''(?x)
(?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
(?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
''', format_id)
ext = format_m.group('container')
is_supported = ext != 'f4f'
PROTO_ORDER = ['http', 'rtmp', 'rtsp']
try:
type_pref = TYPE_ORDER.index(s['media_type'])
proto_pref = -PROTO_ORDER.index(format_m.group('proto'))
except ValueError:
type_pref = 999
proto_pref = 999
QUALITY_ORDER = ['veryhigh', '300']
quality = fnode.find('./quality').text
QUALITY_ORDER = ['veryhigh', '300', 'high', 'med', 'low']
try:
quality_pref = QUALITY_ORDER.index(s['quality'])
quality_pref = -QUALITY_ORDER.index(quality)
except ValueError:
quality_pref = 999
return (type_pref, quality_pref)
abr = int(fnode.find('./audioBitrate').text) // 1000
vbr = int(fnode.find('./videoBitrate').text) // 1000
pref = (is_available, is_supported,
proto_pref, quality_pref, vbr, abr)
sorted_streams = sorted(streams, key=stream_pref)
if not sorted_streams:
raise ExtractorError(u'No stream found.')
stream = sorted_streams[0]
format_note = u''
if not is_supported:
format_note += u'(unsupported)'
if not format_note:
format_note = None
media_link = self._download_webpage(
stream['video_url'],
video_id,
u'Get stream URL')
return {
'format_id': format_id + u'-' + quality,
'url': video_url,
'ext': ext,
'acodec': format_m.group('acodec'),
'vcodec': format_m.group('vcodec'),
'abr': abr,
'vbr': vbr,
'width': int(fnode.find('./width').text),
'height': int(fnode.find('./height').text),
'filesize': int(fnode.find('./filesize').text),
'format_note': format_note,
'_pref': pref,
'_available': is_available,
}
MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"'
RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)'
mobj = re.search(self._MEDIA_STREAM, media_link)
if mobj is None:
mobj = re.search(RTSP_STREAM, media_link)
if mobj is None:
raise ExtractorError(u'Cannot extract mms:// or rtsp:// URL')
video_url = mobj.group('video_url')
title = self._html_search_regex(
r'<h1(?: class="beitragHeadline")?>(.*?)</h1>',
html, u'title')
format_nodes = doc.findall('.//formitaeten/formitaet')
formats = sorted(filter(lambda f: f['_available'],
map(xml_to_format, format_nodes)),
key=operator.itemgetter('_pref'))
return {
'id': video_id,
'url': video_url,
'title': title,
'ext': determine_ext(video_url)
'formats': formats,
'description': description,
'uploader': uploader,
'duration': duration,
'upload_date': upload_date,
}

View File

@ -41,6 +41,7 @@ def rsa_verify(message, signature, key):
if signature != sha256(message).digest(): return False
return True
def update_self(to_screen, verbose):
"""Update the program file with the latest version from the repository"""
@ -82,6 +83,13 @@ def update_self(to_screen, verbose):
return
version_id = versions_info['latest']
def version_tuple(version_str):
return tuple(map(int, version_str.split('.')))
if version_tuple(__version__) >= version_tuple(version_id):
to_screen(u'youtube-dl is up to date (%s)' % __version__)
return
to_screen(u'Updating to version ' + version_id + '...')
version = versions_info['versions'][version_id]
@ -109,7 +117,7 @@ def update_self(to_screen, verbose):
urlh = compat_urllib_request.urlopen(version['exe'][0])
newcontent = urlh.read()
urlh.close()
except (IOError, OSError) as err:
except (IOError, OSError):
if verbose: to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: unable to download latest version')
return
@ -122,7 +130,7 @@ def update_self(to_screen, verbose):
try:
with open(exe + '.new', 'wb') as outf:
outf.write(newcontent)
except (IOError, OSError) as err:
except (IOError, OSError):
if verbose: to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: unable to write the new version')
return
@ -141,7 +149,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
subprocess.Popen([bat]) # Continues to run in the background
return # Do not show premature success messages
except (IOError, OSError) as err:
except (IOError, OSError):
if verbose: to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: unable to overwrite current version')
return
@ -152,7 +160,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
urlh = compat_urllib_request.urlopen(version['bin'][0])
newcontent = urlh.read()
urlh.close()
except (IOError, OSError) as err:
except (IOError, OSError):
if verbose: to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: unable to download latest version')
return
@ -165,7 +173,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
try:
with open(filename, 'wb') as outf:
outf.write(newcontent)
except (IOError, OSError) as err:
except (IOError, OSError):
if verbose: to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: unable to overwrite current version')
return

View File

@ -8,13 +8,16 @@ import gzip
import io
import json
import locale
import math
import os
import pipes
import platform
import re
import ssl
import socket
import sys
import traceback
import xml.etree.ElementTree
import zlib
try:
@ -535,17 +538,34 @@ def formatSeconds(secs):
else:
return '%d' % secs
def make_HTTPS_handler(opts):
if sys.version_info < (3,2):
# Python's 2.x handler is very simplistic
return compat_urllib_request.HTTPSHandler()
def make_HTTPS_handler(opts_no_check_certificate):
if sys.version_info < (3, 2):
import httplib
class HTTPSConnectionV3(httplib.HTTPSConnection):
def __init__(self, *args, **kwargs):
httplib.HTTPSConnection.__init__(self, *args, **kwargs)
def connect(self):
sock = socket.create_connection((self.host, self.port), self.timeout)
if self._tunnel_host:
self.sock = sock
self._tunnel()
try:
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
except ssl.SSLError:
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
def https_open(self, req):
return self.do_open(HTTPSConnectionV3, req)
return HTTPSHandlerV3()
else:
import ssl
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
context.set_default_verify_paths()
context.verify_mode = (ssl.CERT_NONE
if opts.no_check_certificate
if opts_no_check_certificate
else ssl.CERT_REQUIRED)
return compat_urllib_request.HTTPSHandler(context=context)
@ -734,6 +754,8 @@ def unified_strdate(date_str):
'%Y/%m/%d %H:%M:%S',
'%d.%m.%Y %H:%M',
'%Y-%m-%dT%H:%M:%SZ',
'%Y-%m-%dT%H:%M:%S.%fZ',
'%Y-%m-%dT%H:%M:%S.%f0Z',
'%Y-%m-%dT%H:%M:%S',
]
for expression in format_expressions:
@ -949,7 +971,16 @@ class locked_file(object):
def shell_quote(args):
return ' '.join(map(pipes.quote, args))
quoted_args = []
encoding = sys.getfilesystemencoding()
if encoding is None:
encoding = 'utf-8'
for a in args:
if isinstance(a, bytes):
# We may get a filename encoded with 'encodeFilename'
a = a.decode(encoding)
quoted_args.append(pipes.quote(a))
return u' '.join(quoted_args)
def takewhile_inclusive(pred, seq):
@ -976,3 +1007,22 @@ def unsmuggle_url(smug_url):
jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
data = json.loads(jsond)
return url, data
def parse_xml_doc(s):
assert isinstance(s, type(u''))
return xml.etree.ElementTree.fromstring(s.encode('utf-8'))
def format_bytes(bytes):
if bytes is None:
return u'N/A'
if type(bytes) is str:
bytes = float(bytes)
if bytes == 0.0:
exponent = 0
else:
exponent = int(math.log(bytes, 1024.0))
suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
converted = float(bytes) / float(1024 ** exponent)
return u'%.2f%s' % (converted, suffix)

View File

@ -1,2 +1,2 @@
__version__ = '2013.11.18.1'
__version__ = '2013.11.25.3'