Compare commits

..

41 Commits

Author SHA1 Message Date
80b9bbce86 release 2013.11.13 2013-11-13 11:09:04 +01:00
d37936386f Credit @saper for tvp IE (#1730) 2013-11-13 11:08:07 +01:00
c3a3028f9f [tvp] Minor improvements (#1730) 2013-11-13 11:06:53 +01:00
6c5ad80cdc Merge remote-tracking branch 'saper/tvp' 2013-11-13 11:03:49 +01:00
b5bdc2699a Credit @jelly for gamekings extractor (#1759) 2013-11-13 10:52:22 +01:00
384b98cd8f [gamekings] Minor fixes (#1759) 2013-11-13 10:51:00 +01:00
eb9b5bffef Add extractor for gamekings.tv 2013-11-13 10:38:47 +01:00
8b8cbd8f6d [vine] Fix uploader extraction 2013-11-12 20:50:52 +01:00
72b18c5d34 FFmpegMetadataPP: don't enclose the values with " (fixes #1756) 2013-11-12 20:38:13 +01:00
eb0a839866 [common] Simplify og_search_property 2013-11-12 10:36:23 +01:00
1777d5a952 release 2013.11.11 2013-11-11 18:28:17 +01:00
d4b7da84c3 Clarify -c. Do not pass it in if you don't know what you're doing
Suggested in #1743
2013-11-11 14:21:14 +01:00
801dbbdffd Use avconv for downloading with m3u8 manifests if it's available (fixes #1735) 2013-11-10 16:47:03 +01:00
0ed05a1d2d Use the 'rtmp_live' field for the live parameter of rtmpdump 2013-11-10 12:45:17 +01:00
1008bebade Merge remote-tracking branch 'rzhxeo/rtmpdump_live' 2013-11-10 12:38:40 +01:00
ae84f879d7 Merge all the subtitles test into a single file
They reuse a base class
2013-11-10 12:28:21 +01:00
be6dfd1b49 [ted] Return a single info_dict for talks urls
It failed with the --list-subs option
2013-11-10 12:09:12 +01:00
231516b6c9 Merge pull request #1705 from iemejia/master
[ted] support for subtitles
2013-11-10 11:54:18 +01:00
fb53d58dcf Merge pull request #1726 from saper/escaped
Fix AssertionError when og property not found
2013-11-10 02:51:52 -08:00
2a9e9b210b Fix the documentation of '--autonumber-size' (#1743)
it's '--auto-number' not '--autonumber'
2013-11-09 19:21:30 +01:00
897d6cc43a Improve format listing for long format ids
Now arte.tv videos have quite long ids.
2013-11-09 19:07:34 +01:00
f470c6c812 [arte] Improve the format sorting
Also use the bitrate.
Prefer normal version and sourds/mal version over original version with subtitles.
2013-11-09 19:05:19 +01:00
566d4e0425 [arte] Make sure the format_id is unique (closes #1739)
Include the bitrate and use the height instead of the quality field.
2013-11-09 19:01:23 +01:00
81be02d2f9 [cnn] Accept www.cnn.com urls (fixes #1740) 2013-11-09 18:16:32 +01:00
c2b6a482d5 [brightcove] the format function requires to specify the index in python2.6 2013-11-09 18:10:11 +01:00
12c167c881 [soundcloud] Allow to download tracks marked as not 'streamable'
They use the rtmp protocol but if the are marked as 'downloadable' it can use the direct download link.
2013-11-09 18:08:03 +01:00
20aafee7fa [kankan] Fix the video url
It now requires two additional parameters, one is a timestamp we get from the getCdnresource_flv page and the other is a key we have to build.
2013-11-09 16:51:11 +01:00
be07375b66 Don't recode the video with m3u8 downloads (fixes #1741) 2013-11-09 16:40:00 +01:00
dd5bcdc4c9 [brightcove] Set the 'Referer' header if the url has the 'linkBaseUrl' parameter (fixes #1553) 2013-11-07 21:06:48 +01:00
6161d17579 release 2013.11.07 2013-11-07 11:06:34 +01:00
4ac5306ae7 Fix the report progress when file_size is unknown (#1731)
The report_progress function will accept eta and percent with None value and will set the message to 'Unknow ETA' or 'Unknown %'.
Otherwise the values must be numbers.
2013-11-07 08:03:35 +01:00
b1a80ec1a9 [xnxx] Accept urls that start with 'www' (fixes #1734) 2013-11-06 23:45:01 +01:00
5137ebac0b [tvp] Telewizja Polska: new extractor for tvp.pl, fixes #1719
Thanks-To: mplonski

https://github.com/mplonski/linux/blob/master/tvp-dl.py
2013-11-05 23:47:40 +01:00
a8eeb0597b Fix AssertionError when og property not found
On tvp.pl some webpages contain OpenGraph
metadata and some don't.

If og property is not found, _og_search_description
fails with

WARNING: unable to extract OpenGraph description; please report this issue on http://yt-dl.org/bug
Traceback (most recent call last):
  File "/usr/home/saper/bin/youtube-dl", line 18, in <module>
    youtube_dl.main()
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/__init__.py", line 766, in main
    _real_main(argv)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/__init__.py", line 719, in _real_main
    retcode = ydl.download(all_urls)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/YoutubeDL.py", line 715, in download
    videos = self.extract_info(url)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/YoutubeDL.py", line 348, in extract_info
    ie_result = ie.extract(url)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/extractor/common.py", line 125, in extract
    return self._real_extract(url)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/extractor/tvp.py", line 56, in _real_extract
    info['description'] = self._og_search_description(webpage)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/extractor/common.py", line 331, in _og_search_description
    return self._og_search_property('description', html, fatal=False, **kargs)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/extractor/common.py", line 325, in _og_search_property
    return unescapeHTML(escaped)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/utils.py", line 494, in unescapeHTML
    assert type(s) == type(u'')
AssertionError

The patch allows me to use:

  try:
    info['description'] = self._og_search_description(webpage)
    info['thumbnail'] = self._og_search_thumbnail(webpage)
  except RegexNotFoundError:
    pass
2013-11-05 23:19:29 +01:00
4ed3e51080 [ted] fixed error in case of no subtitles present
I created a test, but I leave it commented since TED videos get
new subtitles frequently.
2013-11-05 12:00:13 +01:00
38fcd4597a Merge remote-tracking branch 'iemejia/master' 2013-11-02 19:56:06 +01:00
38db46794f Merge branch 'ted_subtitles' 2013-11-02 19:50:45 +01:00
a9a3876d55 [ted] Added support for subtitle download 2013-11-02 19:48:39 +01:00
1f343eaabb [subtitles] refactor to support websites with subtitle information the
webpage.

I added the parameter webpage, so now it's similar to the way automatic
captions are handled. This is an improvement needed for websites like
TED.
2013-11-02 19:29:25 +01:00
0a43ddf320 [CinemassacreIE] Add live paramter to extracted info as a workaround 2013-11-02 18:08:35 +01:00
31366066bd Add support for live parameter to rtmpdump 2013-11-02 18:08:16 +01:00
28 changed files with 490 additions and 306 deletions

View File

@ -92,12 +92,14 @@ which means you can modify it, redistribute it or use it however you like.
ownloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
--autonumber-size NUMBER Specifies the number of digits in %(autonumber)s
when it is present in output filename template or
--autonumber option is given
--auto-number option is given
--restrict-filenames Restrict filenames to only ASCII characters, and
avoid "&" and spaces in filenames
-a, --batch-file FILE file containing URLs to download ('-' for stdin)
-w, --no-overwrites do not overwrite files
-c, --continue resume partially downloaded files
-c, --continue force resume of partially downloaded files. By
default, youtube-dl will resume downloads if
possible.
--no-continue do not resume partially downloaded files (restart
from beginning)
--cookies FILE file to read cookies from and dump cookie jar in

View File

@ -1,70 +0,0 @@
#!/usr/bin/env python
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup, md5
global_setup()
from youtube_dl.extractor import DailymotionIE
class TestDailymotionSubtitles(unittest.TestCase):
def setUp(self):
self.DL = FakeYDL()
self.url = 'http://www.dailymotion.com/video/xczg00'
def getInfoDict(self):
IE = DailymotionIE(self.DL)
info_dict = IE.extract(self.url)
return info_dict
def getSubtitles(self):
info_dict = self.getInfoDict()
return info_dict['subtitles']
def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['fr']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 5)
def test_list_subtitles(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
def test_nosubtitles(self):
self.DL.expect_warning(u'video doesn\'t have subtitles')
self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
def test_multiple_langs(self):
self.DL.params['writesubtitles'] = True
langs = ['es', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
if __name__ == '__main__':
unittest.main()

View File

@ -17,6 +17,7 @@ from youtube_dl.extractor import (
DailymotionUserIE,
VimeoChannelIE,
UstreamChannelIE,
SoundcloudSetIE,
SoundcloudUserIE,
LivestreamIE,
NHLVideocenterIE,
@ -61,6 +62,14 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['id'], u'5124905')
self.assertTrue(len(result['entries']) >= 11)
def test_soundcloud_set(self):
dl = FakeYDL()
ie = SoundcloudSetIE(dl)
result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'The Royal Concept EP')
self.assertTrue(len(result['entries']) >= 6)
def test_soundcloud_user(self):
dl = FakeYDL()
ie = SoundcloudUserIE(dl)

211
test/test_subtitles.py Normal file
View File

@ -0,0 +1,211 @@
#!/usr/bin/env python
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup, md5
global_setup()
from youtube_dl.extractor import (
YoutubeIE,
DailymotionIE,
TEDIE,
)
class BaseTestSubtitles(unittest.TestCase):
url = None
IE = None
def setUp(self):
self.DL = FakeYDL()
self.ie = self.IE(self.DL)
def getInfoDict(self):
info_dict = self.ie.extract(self.url)
return info_dict
def getSubtitles(self):
info_dict = self.getInfoDict()
return info_dict['subtitles']
class TestYoutubeSubtitles(BaseTestSubtitles):
url = 'QRS8MkLhQmM'
IE = YoutubeIE
def getSubtitles(self):
info_dict = self.getInfoDict()
return info_dict[0]['subtitles']
def test_youtube_no_writesubtitles(self):
self.DL.params['writesubtitles'] = False
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_youtube_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
def test_youtube_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
def test_youtube_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 13)
def test_youtube_subtitles_sbv_format(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'sbv'
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
def test_youtube_subtitles_vtt_format(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'vtt'
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
def test_youtube_list_subtitles(self):
self.DL.expect_warning(u'Video doesn\'t have automatic captions')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_youtube_automatic_captions(self):
self.url = '8YoUxe5ncPo'
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles()
self.assertTrue(subtitles['it'] is not None)
def test_youtube_nosubtitles(self):
self.DL.expect_warning(u'video doesn\'t have subtitles')
self.url = 'sAjKT8FhjI8'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
def test_youtube_multiple_langs(self):
self.url = 'QRS8MkLhQmM'
self.DL.params['writesubtitles'] = True
langs = ['it', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
class TestDailymotionSubtitles(BaseTestSubtitles):
url = 'http://www.dailymotion.com/video/xczg00'
IE = DailymotionIE
def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['fr']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 5)
def test_list_subtitles(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
def test_nosubtitles(self):
self.DL.expect_warning(u'video doesn\'t have subtitles')
self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
def test_multiple_langs(self):
self.DL.params['writesubtitles'] = True
langs = ['es', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
class TestTedSubtitles(BaseTestSubtitles):
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
IE = TEDIE
def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '2154f31ff9b9f89a0aa671537559c21d')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['fr']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['fr']), '7616cbc6df20ec2c1204083c83871cf6')
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 28)
def test_list_subtitles(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
def test_multiple_langs(self):
self.DL.params['writesubtitles'] = True
langs = ['es', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
if __name__ == '__main__':
unittest.main()

View File

@ -1,95 +0,0 @@
#!/usr/bin/env python
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup, md5
global_setup()
from youtube_dl.extractor import YoutubeIE
class TestYoutubeSubtitles(unittest.TestCase):
def setUp(self):
self.DL = FakeYDL()
self.url = 'QRS8MkLhQmM'
def getInfoDict(self):
IE = YoutubeIE(self.DL)
info_dict = IE.extract(self.url)
return info_dict
def getSubtitles(self):
info_dict = self.getInfoDict()
return info_dict[0]['subtitles']
def test_youtube_no_writesubtitles(self):
self.DL.params['writesubtitles'] = False
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_youtube_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
def test_youtube_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
def test_youtube_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 13)
def test_youtube_subtitles_sbv_format(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'sbv'
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
def test_youtube_subtitles_vtt_format(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'vtt'
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
def test_youtube_list_subtitles(self):
self.DL.expect_warning(u'Video doesn\'t have automatic captions')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_youtube_automatic_captions(self):
self.url = '8YoUxe5ncPo'
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles()
self.assertTrue(subtitles['it'] is not None)
def test_youtube_nosubtitles(self):
self.DL.expect_warning(u'video doesn\'t have subtitles')
self.url = 'sAjKT8FhjI8'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
def test_youtube_multiple_langs(self):
self.url = 'QRS8MkLhQmM'
self.DL.params['writesubtitles'] = True
langs = ['it', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
if __name__ == '__main__':
unittest.main()

View File

@ -234,8 +234,14 @@ class FileDownloader(object):
if self.params.get('noprogress', False):
return
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
eta_str = self.format_eta(eta)
percent_str = self.format_percent(percent)
if eta is not None:
eta_str = self.format_eta(eta)
else:
eta_str = 'Unknown ETA'
if percent is not None:
percent_str = self.format_percent(percent)
else:
percent_str = 'Unknown %'
speed_str = self.format_speed(speed)
if self.params.get('progress_with_newline', False):
self.to_screen(u'[download] %s of %s at %s ETA %s' %
@ -274,7 +280,7 @@ class FileDownloader(object):
self.to_screen(u'\r%s[download] 100%% of %s in %s' %
(clear_line, data_len_str, self.format_seconds(tot_time)))
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
test = self.params.get('test', False)
@ -301,6 +307,8 @@ class FileDownloader(object):
basic_args += ['--tcUrl', url]
if test:
basic_args += ['--stop', '1']
if live:
basic_args += ['--live']
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
if self.params.get('verbose', False):
try:
@ -373,15 +381,20 @@ class FileDownloader(object):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
args = ['ffmpeg', '-y', '-i', url, '-f', 'mp4', tmpfilename]
# Check for ffmpeg first
try:
subprocess.call(['ffmpeg', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
except (OSError, IOError):
self.report_error(u'm3u8 download detected but "%s" could not be run' % args[0] )
return False
args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
'-bsf:a', 'aac_adtstoasc', tmpfilename]
retval = subprocess.call(args)
for program in ['avconv', 'ffmpeg']:
try:
subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
break
except (OSError, IOError):
pass
else:
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
cmd = [program] + args
retval = subprocess.call(cmd)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
@ -418,7 +431,8 @@ class FileDownloader(object):
info_dict.get('player_url', None),
info_dict.get('page_url', None),
info_dict.get('play_path', None),
info_dict.get('tc_url', None))
info_dict.get('tc_url', None),
info_dict.get('rtmp_live', False))
# Attempt to download using mplayer
if url.startswith('mms') or url.startswith('rtsp'):
@ -557,12 +571,11 @@ class FileDownloader(object):
# Progress message
speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
if data_len is None:
self.report_progress('Unknown %', data_len_str, speed, 'Unknown ETA')
eta = None
eta = percent = None
else:
percent = self.calc_percent(byte_counter, data_len)
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
self.report_progress(percent, data_len_str, speed, eta)
self.report_progress(percent, data_len_str, speed, eta)
self._hook_progress({
'downloaded_bytes': byte_counter,

View File

@ -501,7 +501,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
options = ['-c', 'copy']
for (name, value) in metadata.items():
options.extend(['-metadata', '%s="%s"' % (name, value)])
options.extend(['-metadata', '%s=%s' % (name, value)])
options.extend(['-f', ext])
self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)

View File

@ -782,7 +782,7 @@ class YoutubeDL(object):
def list_formats(self, info_dict):
def line(format):
return (u'%-15s%-10s%-12s%s' % (
return (u'%-20s%-10s%-12s%s' % (
format['format_id'],
format['ext'],
self.format_resolution(format),

View File

@ -32,6 +32,8 @@ __authors__ = (
'Ismael Mejía',
'Steffan \'Ruirize\' James',
'Andras Elso',
'Jelle van der Waa',
'Marcin Cieślak',
)
__license__ = 'Public Domain'
@ -349,7 +351,7 @@ def parseOpts(overrideArguments=None):
'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
filesystem.add_option('--autonumber-size',
dest='autonumber_size', metavar='NUMBER',
help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --autonumber option is given')
help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
filesystem.add_option('--restrict-filenames',
action='store_true', dest='restrictfilenames',
help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
@ -358,7 +360,7 @@ def parseOpts(overrideArguments=None):
filesystem.add_option('-w', '--no-overwrites',
action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
filesystem.add_option('-c', '--continue',
action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True)
action='store_true', dest='continue_dl', help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.', default=True)
filesystem.add_option('--no-continue',
action='store_false', dest='continue_dl',
help='do not resume partially downloaded files (restart from beginning)')

View File

@ -57,6 +57,7 @@ from .francetv import (
)
from .freesound import FreesoundIE
from .funnyordie import FunnyOrDieIE
from .gamekings import GamekingsIE
from .gamespot import GameSpotIE
from .gametrailers import GametrailersIE
from .generic import GenericIE
@ -134,6 +135,7 @@ from .tube8 import Tube8IE
from .tudou import TudouIE
from .tumblr import TumblrIE
from .tutv import TutvIE
from .tvp import TvpIE
from .unistra import UnistraIE
from .ustream import UstreamIE, UstreamChannelIE
from .vbox7 import Vbox7IE

View File

@ -10,6 +10,7 @@ from ..utils import (
unified_strdate,
determine_ext,
get_element_by_id,
compat_str,
)
# There are different sources of video in arte.tv, the extraction process
@ -181,20 +182,30 @@ class ArteTVPlus7IE(InfoExtractor):
formats = all_formats
else:
raise ExtractorError(u'The formats list is empty')
# We order the formats by quality
if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
sort_key = lambda f: ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
def sort_key(f):
return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
else:
sort_key = lambda f: int(f.get('height',-1))
def sort_key(f):
return (
# Sort first by quality
int(f.get('height',-1)),
int(f.get('bitrate',-1)),
# The original version with subtitles has lower relevance
re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None,
# The version with sourds/mal subtitles has also lower relevance
re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None,
)
formats = sorted(formats, key=sort_key)
# Prefer videos without subtitles in the same language
formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f.get('versionCode', '')) is None)
# Pick the best quality
def _format(format_info):
quality = format_info['quality']
m_quality = re.match(r'\w*? - (\d*)p', quality)
if m_quality is not None:
quality = m_quality.group(1)
quality = ''
height = format_info.get('height')
if height is not None:
quality = compat_str(height)
bitrate = format_info.get('bitrate')
if bitrate is not None:
quality += '-%d' % bitrate
if format_info.get('versionCode') is not None:
format_id = u'%s-%s' % (quality, format_info['versionCode'])
else:
@ -203,7 +214,7 @@ class ArteTVPlus7IE(InfoExtractor):
'format_id': format_id,
'format_note': format_info.get('versionLibelle'),
'width': format_info.get('width'),
'height': format_info.get('height'),
'height': height,
}
if format_info['mediaType'] == u'rtmp':
info['url'] = format_info['streamer']

View File

@ -10,10 +10,12 @@ from ..utils import (
find_xpath_attr,
compat_urlparse,
compat_str,
compat_urllib_request,
ExtractorError,
)
class BrightcoveIE(InfoExtractor):
_VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
@ -80,6 +82,9 @@ class BrightcoveIE(InfoExtractor):
videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
if videoPlayer is not None:
params['@videoPlayer'] = videoPlayer.attrib['value']
linkBase = find_xpath_attr(object_doc, './param', 'name', 'linkBaseURL')
if linkBase is not None:
params['linkBaseURL'] = linkBase.attrib['value']
data = compat_urllib_parse.urlencode(params)
return cls._FEDERATED_URL_TEMPLATE % data
@ -107,14 +112,18 @@ class BrightcoveIE(InfoExtractor):
videoPlayer = query.get('@videoPlayer')
if videoPlayer:
return self._get_video_info(videoPlayer[0], query_str)
return self._get_video_info(videoPlayer[0], query_str, query)
else:
player_key = query['playerKey']
return self._get_playlist_info(player_key[0])
def _get_video_info(self, video_id, query):
request_url = self._FEDERATED_URL_TEMPLATE % query
webpage = self._download_webpage(request_url, video_id)
def _get_video_info(self, video_id, query_str, query):
request_url = self._FEDERATED_URL_TEMPLATE % query_str
req = compat_urllib_request.Request(request_url)
linkBase = query.get('linkBaseURL')
if linkBase is not None:
req.add_header('Referer', linkBase[0])
webpage = self._download_webpage(req, video_id)
self.report_extraction(video_id)
info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')

View File

@ -65,6 +65,7 @@ class CinemassacreIE(InfoExtractor):
{
'url': url,
'play_path': 'mp4:' + sd_file,
'rtmp_live': True, # workaround
'ext': 'flv',
'format': 'sd',
'format_id': 'sd',
@ -72,6 +73,7 @@ class CinemassacreIE(InfoExtractor):
{
'url': url,
'play_path': 'mp4:' + hd_file,
'rtmp_live': True, # workaround
'ext': 'flv',
'format': 'hd',
'format_id': 'hd',

View File

@ -6,7 +6,7 @@ from ..utils import determine_ext
class CNNIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/
_VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/
(?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
_TESTS = [{

View File

@ -322,6 +322,8 @@ class InfoExtractor(object):
if name is None:
name = 'OpenGraph %s' % prop
escaped = self._search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs)
if escaped is None:
return None
return unescapeHTML(escaped)
def _og_search_thumbnail(self, html, **kargs):

View File

@ -141,9 +141,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
raise ExtractorError(u'Unable to extract video URL')
# subtitles
video_subtitles = self.extract_subtitles(video_id)
video_subtitles = self.extract_subtitles(video_id, webpage)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id)
self._list_available_subtitles(video_id, webpage)
return
return {
@ -157,7 +157,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
'age_limit': age_limit,
}
def _get_available_subtitles(self, video_id):
def _get_available_subtitles(self, video_id, webpage):
try:
sub_list = self._download_webpage(
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,

View File

@ -33,5 +33,5 @@ class EitbIE(InfoExtractor):
raise ExtractorError(u'Could not extract the Brightcove url')
# The BrightcoveExperience object doesn't contain the video id, we set
# it manually
bc_url += '&%40videoPlayer={}'.format(chapter_id)
bc_url += '&%40videoPlayer={0}'.format(chapter_id)
return self.url_result(bc_url, BrightcoveIE.ie_key())

View File

@ -0,0 +1,40 @@
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
)
class GamekingsIE(InfoExtractor):
_VALID_URL = r'http?://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
_TEST = {
u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/",
u'file': u'20130811.mp4',
u'md5': u'17f6088f7d0149ff2b46f2714bdb1954',
u'info_dict': {
u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review",
u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.",
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
webpage = self._download_webpage(url, name)
video_url = self._og_search_video_url(webpage)
video = re.search(r'[0-9]+', video_url)
video_id = video.group(0)
# Todo: add medium format
video_url = video_url.replace(video_id, 'large/' + video_id)
return {
'id': video_id,
'ext': 'mp4',
'url': video_url,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
}

View File

@ -55,15 +55,17 @@ class GenericIE(InfoExtractor):
u'skip': u'There is a limit of 200 free downloads / month for the test song',
},
# embedded brightcove video
# it also tests brightcove videos that need to set the 'Referer' in the
# http requests
{
u'add_ie': ['Brightcove'],
u'url': u'http://www.scientificamerican.com/article.cfm?id=soap-bubble-physics',
u'url': u'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
u'info_dict': {
u'id': u'2365799484001',
u'id': u'2765128793001',
u'ext': u'mp4',
u'title': u'Bubble Simulation',
u'description': u'A visualization from a new computer model of foam behavior.',
u'uploader': u'Scientific American',
u'title': u'Le cours de bourse : lanalyse technique',
u'description': u'md5:7e9ad046e968cb2d1114004aba466fd9',
u'uploader': u'BFM BUSINESS',
},
u'params': {
u'skip_download': True,

View File

@ -1,8 +1,10 @@
import re
import hashlib
from .common import InfoExtractor
from ..utils import determine_ext
_md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
class KankanIE(InfoExtractor):
_VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
@ -30,7 +32,10 @@ class KankanIE(InfoExtractor):
video_id, u'Downloading video url info')
ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip')
path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path')
video_url = 'http://%s%s' % (ip, path)
param1 = self._search_regex(r'param1:(\d+)', video_info_page, u'param1')
param2 = self._search_regex(r'param2:(\d+)', video_info_page, u'param2')
key = _md5('xl_mp43651' + param1 + param2)
video_url = 'http://%s%s?key=%s&key1=%s' % (ip, path, key, param2)
return {'id': video_id,
'title': title,

View File

@ -29,17 +29,34 @@ class SoundcloudIE(InfoExtractor):
)
'''
IE_NAME = u'soundcloud'
_TEST = {
u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
u'file': u'62986583.mp3',
u'md5': u'ebef0a451b909710ed1d7787dddbf0d7',
u'info_dict': {
u"upload_date": u"20121011",
u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
u"uploader": u"E.T. ExTerrestrial Music",
u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
}
}
_TESTS = [
{
u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
u'file': u'62986583.mp3',
u'md5': u'ebef0a451b909710ed1d7787dddbf0d7',
u'info_dict': {
u"upload_date": u"20121011",
u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
u"uploader": u"E.T. ExTerrestrial Music",
u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
}
},
# not streamable song
{
u'url': u'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
u'info_dict': {
u'id': u'47127627',
u'ext': u'mp3',
u'title': u'Goldrushed',
u'uploader': u'The Royal Concept',
u'upload_date': u'20120521',
},
u'params': {
# rtmp
u'skip_download': True,
},
},
]
_CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
@ -56,16 +73,16 @@ class SoundcloudIE(InfoExtractor):
return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
def _extract_info_dict(self, info, full_title=None, quiet=False):
video_id = info['id']
name = full_title or video_id
track_id = compat_str(info['id'])
name = full_title or track_id
if quiet == False:
self.report_extraction(name)
thumbnail = info['artwork_url']
if thumbnail is not None:
thumbnail = thumbnail.replace('-large', '-t500x500')
return {
'id': info['id'],
result = {
'id': track_id,
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
'uploader': info['user']['username'],
'upload_date': unified_strdate(info['created_at']),
@ -74,6 +91,21 @@ class SoundcloudIE(InfoExtractor):
'description': info['description'],
'thumbnail': thumbnail,
}
if info.get('downloadable', False):
result['url'] = 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(track_id, self._CLIENT_ID)
if not info.get('streamable', False):
# We have to get the rtmp url
stream_json = self._download_webpage(
'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._CLIENT_ID),
track_id, u'Downloading track url')
rtmp_url = json.loads(stream_json)['rtmp_mp3_128_url']
# The url doesn't have an rtmp app, we have to extract the playpath
url, path = rtmp_url.split('mp3:', 1)
result.update({
'url': url,
'play_path': 'mp3:' + path,
})
return result
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
@ -106,70 +138,8 @@ class SoundcloudIE(InfoExtractor):
class SoundcloudSetIE(SoundcloudIE):
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
IE_NAME = u'soundcloud:set'
_TEST = {
u"url":"https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep",
u"playlist": [
{
u"file":"30510138.mp3",
u"md5":"f9136bf103901728f29e419d2c70f55d",
u"info_dict": {
u"upload_date": u"20111213",
u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
u"uploader": u"The Royal Concept",
u"title": u"D-D-Dance"
}
},
{
u"file":"47127625.mp3",
u"md5":"09b6758a018470570f8fd423c9453dd8",
u"info_dict": {
u"upload_date": u"20120521",
u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
u"uploader": u"The Royal Concept",
u"title": u"The Royal Concept - Gimme Twice"
}
},
{
u"file":"47127627.mp3",
u"md5":"154abd4e418cea19c3b901f1e1306d9c",
u"info_dict": {
u"upload_date": u"20120521",
u"uploader": u"The Royal Concept",
u"title": u"Goldrushed"
}
},
{
u"file":"47127629.mp3",
u"md5":"2f5471edc79ad3f33a683153e96a79c1",
u"info_dict": {
u"upload_date": u"20120521",
u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
u"uploader": u"The Royal Concept",
u"title": u"In the End"
}
},
{
u"file":"47127631.mp3",
u"md5":"f9ba87aa940af7213f98949254f1c6e2",
u"info_dict": {
u"upload_date": u"20120521",
u"description": u"The Royal Concept from Stockholm\r\nFilip / David / Povel / Magnus\r\nwww.theroyalconceptband.com",
u"uploader": u"The Royal Concept",
u"title": u"Knocked Up"
}
},
{
u"file":"75206121.mp3",
u"md5":"f9d1fe9406717e302980c30de4af9353",
u"info_dict": {
u"upload_date": u"20130116",
u"description": u"The unreleased track World on Fire premiered on the CW's hit show Arrow (8pm/7pm central). \r\nAs a gift to our fans we would like to offer you a free download of the track! ",
u"uploader": u"The Royal Concept",
u"title": u"World On Fire"
}
}
]
}
# it's in tests/test_playlists.py
_TESTS = []
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@ -208,7 +178,7 @@ class SoundcloudUserIE(SoundcloudIE):
IE_NAME = u'soundcloud:user'
# it's in tests/test_playlists.py
_TEST = None
_TESTS = []
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View File

@ -12,9 +12,9 @@ class SubtitlesInfoExtractor(InfoExtractor):
return any([self._downloader.params.get('writesubtitles', False),
self._downloader.params.get('writeautomaticsub')])
def _list_available_subtitles(self, video_id, webpage=None):
def _list_available_subtitles(self, video_id, webpage):
""" outputs the available subtitles for the video """
sub_lang_list = self._get_available_subtitles(video_id)
sub_lang_list = self._get_available_subtitles(video_id, webpage)
auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
sub_lang = ",".join(list(sub_lang_list.keys()))
self.to_screen(u'%s: Available subtitles for video: %s' %
@ -23,7 +23,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
self.to_screen(u'%s: Available automatic captions for video: %s' %
(video_id, auto_lang))
def extract_subtitles(self, video_id, video_webpage=None):
def extract_subtitles(self, video_id, webpage):
"""
returns {sub_lang: sub} ,{} if subtitles not found or None if the
subtitles aren't requested.
@ -32,9 +32,9 @@ class SubtitlesInfoExtractor(InfoExtractor):
return None
available_subs_list = {}
if self._downloader.params.get('writeautomaticsub', False):
available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage))
available_subs_list.update(self._get_available_automatic_caption(video_id, webpage))
if self._downloader.params.get('writesubtitles', False):
available_subs_list.update(self._get_available_subtitles(video_id))
available_subs_list.update(self._get_available_subtitles(video_id, webpage))
if not available_subs_list: # error, it didn't get the available subtitles
return {}
@ -74,7 +74,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
return
return sub
def _get_available_subtitles(self, video_id):
def _get_available_subtitles(self, video_id, webpage):
"""
returns {sub_lang: url} or {} if not available
Must be redefined by the subclasses

View File

@ -1,10 +1,14 @@
import json
import re
from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..utils import (
compat_str,
RegexNotFoundError,
)
class TEDIE(InfoExtractor):
class TEDIE(SubtitlesInfoExtractor):
_VALID_URL=r'''http://www\.ted\.com/
(
((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist
@ -32,7 +36,7 @@ class TEDIE(InfoExtractor):
def _real_extract(self, url):
m=re.match(self._VALID_URL, url, re.VERBOSE)
if m.group('type_talk'):
return [self._talk_info(url)]
return self._talk_info(url)
else :
playlist_id=m.group('playlist_id')
name=m.group('name')
@ -82,11 +86,21 @@ class TEDIE(InfoExtractor):
'url': stream['file'],
'format': stream['id']
} for stream in info['htmlStreams']]
video_id = info['id']
# subtitles
video_subtitles = self.extract_subtitles(video_id, webpage)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, webpage)
return
info = {
'id': info['id'],
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'description': desc,
'subtitles': video_subtitles,
'formats': formats,
}
@ -94,3 +108,17 @@ class TEDIE(InfoExtractor):
info.update(info['formats'][-1])
return info
def _get_available_subtitles(self, video_id, webpage):
try:
options = self._search_regex(r'(?:<select name="subtitles_language_select" id="subtitles_language_select">)(.*?)(?:</select>)', webpage, 'subtitles_language_select', flags=re.DOTALL)
languages = re.findall(r'(?:<option value=")(\S+)"', options)
if languages:
sub_lang_list = {}
for l in languages:
url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
sub_lang_list[l] = url
return sub_lang_list
except RegexNotFoundError as err:
self._downloader.report_warning(u'video doesn\'t have subtitles')
return {}

View File

@ -0,0 +1,41 @@
import json
import re
from .common import InfoExtractor
class TvpIE(InfoExtractor):
IE_NAME = u'tvp.pl'
_VALID_URL = r'https?://www\.tvp\.pl/.*?wideo/(?P<date>\d+)/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.tvp.pl/warszawa/magazyny/campusnews/wideo/31102013/12878238',
u'md5': u'148408967a6a468953c0a75cbdaf0d7a',
u'file': u'12878238.wmv',
u'info_dict': {
u'title': u'31.10.2013',
u'description': u'31.10.2013',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
json_url = 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id
json_params = self._download_webpage(
json_url, video_id, u"Downloading video metadata")
params = json.loads(json_params)
self.report_extraction(video_id)
video_url = params['video_url']
title = self._og_search_title(webpage, fatal=True)
return {
'id': video_id,
'title': title,
'ext': 'wmv',
'url': video_url,
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
}

View File

@ -27,7 +27,7 @@ class VineIE(InfoExtractor):
video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
webpage, u'video URL')
uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>',
uploader = self._html_search_regex(r'<p class="username">(.*?)</p>',
webpage, u'uploader', fatal=False, flags=re.DOTALL)
return [{

View File

@ -9,7 +9,7 @@ from ..utils import (
class XNXXIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?video\.xnxx\.com/video([0-9]+)/(.*)'
_VALID_URL = r'^(?:https?://)?(?:video|www)\.xnxx\.com/video([0-9]+)/(.*)'
VIDEO_URL_RE = r'flv_url=(.*?)&amp;'
VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&amp;'

View File

@ -1082,7 +1082,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else:
raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
def _get_available_subtitles(self, video_id):
def _get_available_subtitles(self, video_id, webpage):
try:
sub_list = self._download_webpage(
'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,

View File

@ -1,2 +1,2 @@
__version__ = '2013.11.06.1'
__version__ = '2013.11.13'