Compare commits
146 Commits
2013.07.24
...
2013.08.23
Author | SHA1 | Date | |
---|---|---|---|
|
8ae7be3ef4 | ||
|
306170518f | ||
|
aa6a10c44a | ||
|
9af73dc4fc | ||
|
fc483bb6af | ||
|
53b0f3e4e2 | ||
|
4353cf51a0 | ||
|
ce34e9ce5e | ||
|
d4051a8e05 | ||
|
df3df7fb64 | ||
|
9e9c164052 | ||
|
066090dd3f | ||
|
74e6672beb | ||
|
02bcf0d389 | ||
|
10204dc898 | ||
|
1865ed31b9 | ||
|
3669cdba10 | ||
|
939fbd26ac | ||
|
e6ddb4e7af | ||
|
83390b83d9 | ||
|
4a55479fa9 | ||
|
f527115b5f | ||
|
75e1b46add | ||
|
063fcc9676 | ||
|
8403612258 | ||
|
25b51c7816 | ||
|
9779b63bb6 | ||
|
d81aef3adf | ||
|
45ed795cb0 | ||
|
683e98a8a4 | ||
|
e0cfeb2ea7 | ||
|
75340ee383 | ||
|
668de34c6b | ||
|
a91b954bb4 | ||
|
37b6d5f684 | ||
|
b7a6838407 | ||
|
cde846b3d3 | ||
|
6c3e6e88d3 | ||
|
739674cd77 | ||
|
4b2d7cae11 | ||
|
7fea7156cb | ||
|
3093468977 | ||
|
79cb25776f | ||
|
87f78946a5 | ||
|
211fbc1328 | ||
|
836a086ce9 | ||
|
90d3989b99 | ||
|
d741e55a42 | ||
|
17d3aaaf16 | ||
|
ea55b2a4ca | ||
|
3f0537dd4a | ||
|
12e895fc5a | ||
|
bda2c49d75 | ||
|
01b32990da | ||
|
dbda1b5147 | ||
|
ddf3bd328b | ||
|
b9c37b92cf | ||
|
f9c3c90ca8 | ||
|
6daccbe317 | ||
|
71ea844c0e | ||
|
3a7256697e | ||
|
d1ba998274 | ||
|
718ced8d8c | ||
|
e1842025d0 | ||
|
2b9213cdc1 | ||
|
0577177e3e | ||
|
298f833b16 | ||
|
0f399e6e5e | ||
|
5b075e27cb | ||
|
8a9d86a2a7 | ||
|
d468a09789 | ||
|
9f4ab73d7f | ||
|
02cf62e240 | ||
|
67fb0c5495 | ||
|
4efba05c56 | ||
|
0f90943e45 | ||
|
526e638c8a | ||
|
356e067390 | ||
|
e2f48f9643 | ||
|
b513a251f8 | ||
|
36cb11f068 | ||
|
7a4c6cc92f | ||
|
7edcb8f39c | ||
|
39b782b390 | ||
|
577664c8e8 | ||
|
bba12cec89 | ||
|
70c4c03cb8 | ||
|
f5791ed136 | ||
|
fbf189a6ee | ||
|
09825cb5c0 | ||
|
ed27d35674 | ||
|
fd5539eb41 | ||
|
04bca64bde | ||
|
03cc7c20c1 | ||
|
4075311d94 | ||
|
95fdc7d69c | ||
|
86fe61c8f9 | ||
|
9bb6d2f21d | ||
|
e3f4593e76 | ||
|
1d043b93cf | ||
|
b15d4f624f | ||
|
4aa16a50f5 | ||
|
bbcbf4d459 | ||
|
930ad9eecc | ||
|
b072a9defd | ||
|
75952c6e3d | ||
|
05afc96b73 | ||
|
fa80026915 | ||
|
2bc3de0f28 | ||
|
99c7bc94af | ||
|
152c8f349d | ||
|
d75654c15e | ||
|
0725f584e1 | ||
|
8cda9241d1 | ||
|
a3124ba49f | ||
|
579e2691fe | ||
|
63f05de10b | ||
|
caeefc29eb | ||
|
a3c736def2 | ||
|
58261235f0 | ||
|
da70877a1b | ||
|
5c468ca8a8 | ||
|
aedd6bb97d | ||
|
733d9cacb8 | ||
|
42f2805e48 | ||
|
0ffcb7c6fc | ||
|
27669bd11d | ||
|
6625f82940 | ||
|
d0866f0bb4 | ||
|
09eeb75130 | ||
|
0a99956f71 | ||
|
12ef6aefa8 | ||
|
e93aa81aa6 | ||
|
755eb0320e | ||
|
43ba5456b1 | ||
|
156d5ad6da | ||
|
c626a3d9fa | ||
|
b2e8bc1b20 | ||
|
771822ebb8 | ||
|
eb6a41ba0f | ||
|
7d2392691c | ||
|
6804038d06 | ||
|
2f799533ae | ||
|
88ae5991cd | ||
|
5d51a883c2 | ||
|
c4a91be726 |
16
README.md
16
README.md
@@ -120,18 +120,20 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--max-quality FORMAT highest quality format to download
|
||||
-F, --list-formats list all available formats (currently youtube
|
||||
only)
|
||||
|
||||
## Subtitle Options:
|
||||
--write-sub write subtitle file (currently youtube only)
|
||||
--write-auto-sub write automatic subtitle file (currently youtube
|
||||
only)
|
||||
--only-sub [deprecated] alias of --skip-download
|
||||
--all-subs downloads all the available subtitles of the
|
||||
video (currently youtube only)
|
||||
video
|
||||
--list-subs lists all available subtitles for the video
|
||||
(currently youtube only)
|
||||
--sub-format FORMAT subtitle format [srt/sbv/vtt] (default=srt)
|
||||
(currently youtube only)
|
||||
--sub-lang LANG language of the subtitles to download (optional)
|
||||
use IETF language tags like 'en'
|
||||
--sub-format FORMAT subtitle format (default=srt) ([sbv/vtt] youtube
|
||||
only)
|
||||
--sub-lang LANGS languages of the subtitles to download (optional)
|
||||
separated by commas, use IETF language tags like
|
||||
'en,pt'
|
||||
|
||||
## Authentication Options:
|
||||
-u, --username USERNAME account username
|
||||
@@ -153,6 +155,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
processing; the video is erased by default
|
||||
--no-post-overwrites do not overwrite post-processed files; the post-
|
||||
processed files are overwritten by default
|
||||
--embed-subs embed subtitles in the video (only for mp4
|
||||
videos)
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
|
@@ -11,30 +11,45 @@ tests = [
|
||||
# 90
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`",
|
||||
"mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"),
|
||||
# 89
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'",
|
||||
"/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"),
|
||||
# 88
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
|
||||
"J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
|
||||
# 87 - vflART1Nf 2013/07/24
|
||||
# 87
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
|
||||
"tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"),
|
||||
# 86 - vfl_ymO4Z 2013/06/27
|
||||
"uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
|
||||
# 86 - vflh9ybst 2013/08/23
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
|
||||
"ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
|
||||
# 85 - vflSAFCP9 2013/07/19
|
||||
"yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"),
|
||||
# 85
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
|
||||
"ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"),
|
||||
".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
|
||||
# 84
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
|
||||
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
|
||||
# 83 - vflcaqGO8 2013/07/11
|
||||
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543q1mnbvcxzasdfghjklpoiuew2"),
|
||||
# 83
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
|
||||
"urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"),
|
||||
# 82
|
||||
".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
|
||||
# 82 - vflZK4ZYR 2013/08/23
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
|
||||
"Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"),
|
||||
# 81
|
||||
"wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>.<"),
|
||||
# 81 - vflLC8JvQ 2013/07/25
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.",
|
||||
"urty8ioplkjhgfdsazxcvbqm1234567e90QWERTYUIOPLKHGFDSnZXCVBNM!@#$%^&*(-+={[};?/>."),
|
||||
"C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
|
||||
# 80 - vflZK4ZYR 2013/08/23 (sporadic)
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>",
|
||||
"wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>"),
|
||||
# 79 - vflLC8JvQ 2013/07/25 (sporadic)
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/",
|
||||
"Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
|
||||
]
|
||||
|
||||
tests_age_gate = [
|
||||
# 86 - vflqinMWD
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
|
||||
"ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
|
||||
]
|
||||
|
||||
def find_matching(wrong, right):
|
||||
@@ -87,6 +102,8 @@ def genall(tests):
|
||||
|
||||
def main():
|
||||
print(genall(tests))
|
||||
print(u' Age gate:')
|
||||
print(genall(tests_age_gate))
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@@ -50,6 +50,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch_popup?v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
|
||||
def test_no_duplicates(self):
|
||||
ies = gen_extractors()
|
||||
|
38
test/test_playlists.py
Normal file
38
test/test_playlists.py
Normal file
@@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import sys
|
||||
import unittest
|
||||
import json
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE
|
||||
from youtube_dl.utils import *
|
||||
|
||||
from helper import FakeYDL
|
||||
|
||||
class TestPlaylists(unittest.TestCase):
|
||||
def assertIsPlaylist(self, info):
|
||||
"""Make sure the info has '_type' set to 'playlist'"""
|
||||
self.assertEqual(info['_type'], 'playlist')
|
||||
|
||||
def test_dailymotion_playlist(self):
|
||||
dl = FakeYDL()
|
||||
ie = DailymotionPlaylistIE(dl)
|
||||
result = ie.extract('http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], u'SPORT')
|
||||
self.assertTrue(len(result['entries']) > 20)
|
||||
|
||||
def test_vimeo_channel(self):
|
||||
dl = FakeYDL()
|
||||
ie = VimeoChannelIE(dl)
|
||||
result = ie.extract('http://vimeo.com/channels/tributes')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], u'Vimeo Tributes')
|
||||
self.assertTrue(len(result['entries']) > 24)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@@ -1,67 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import unittest
|
||||
import sys
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.extractor.youtube import YoutubeIE
|
||||
from helper import FakeYDL
|
||||
|
||||
sig = YoutubeIE(FakeYDL())._decrypt_signature
|
||||
|
||||
class TestYoutubeSig(unittest.TestCase):
|
||||
def test_92(self):
|
||||
wrong = "F9F9B6E6FD47029957AB911A964CC20D95A181A5D37A2DBEFD67D403DB0E8BE4F4910053E4E8A79.0B70B.0B80B8"
|
||||
right = "69B6E6FD47029957AB911A9F4CC20D95A181A5D3.A2DBEFD67D403DB0E8BE4F4910053E4E8A7980B7"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_90(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`"
|
||||
right = "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_88(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<"
|
||||
right = "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_87(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<"
|
||||
right = "tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_86(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"
|
||||
right = "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_85(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<"
|
||||
right = "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_84(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"
|
||||
right = "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_83(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<"
|
||||
right = "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_82(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<"
|
||||
right = "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_81(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>."
|
||||
right = "urty8ioplkjhgfdsazxcvbqm1234567e90QWERTYUIOPLKHGFDSnZXCVBNM!@#$%^&*(-+={[};?/>."
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@@ -35,47 +35,47 @@ class TestYoutubeSubtitles(unittest.TestCase):
|
||||
DL.params['writesubtitles'] = True
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
sub = info_dict[0]['subtitles'][0]
|
||||
self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260')
|
||||
sub = info_dict[0]['subtitles']['en']
|
||||
self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260')
|
||||
def test_youtube_subtitles_it(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writesubtitles'] = True
|
||||
DL.params['subtitleslang'] = 'it'
|
||||
DL.params['subtitleslangs'] = ['it']
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
sub = info_dict[0]['subtitles'][0]
|
||||
self.assertEqual(md5(sub[2]), '164a51f16f260476a05b50fe4c2f161d')
|
||||
sub = info_dict[0]['subtitles']['it']
|
||||
self.assertEqual(md5(sub), '164a51f16f260476a05b50fe4c2f161d')
|
||||
def test_youtube_onlysubtitles(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writesubtitles'] = True
|
||||
DL.params['onlysubtitles'] = True
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
sub = info_dict[0]['subtitles'][0]
|
||||
self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260')
|
||||
sub = info_dict[0]['subtitles']['en']
|
||||
self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260')
|
||||
def test_youtube_allsubtitles(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['allsubtitles'] = True
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
subtitles = info_dict[0]['subtitles']
|
||||
self.assertEqual(len(subtitles), 13)
|
||||
self.assertEqual(len(subtitles.keys()), 13)
|
||||
def test_youtube_subtitles_sbv_format(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writesubtitles'] = True
|
||||
DL.params['subtitlesformat'] = 'sbv'
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
sub = info_dict[0]['subtitles'][0]
|
||||
self.assertEqual(md5(sub[2]), '13aeaa0c245a8bed9a451cb643e3ad8b')
|
||||
sub = info_dict[0]['subtitles']['en']
|
||||
self.assertEqual(md5(sub), '13aeaa0c245a8bed9a451cb643e3ad8b')
|
||||
def test_youtube_subtitles_vtt_format(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writesubtitles'] = True
|
||||
DL.params['subtitlesformat'] = 'vtt'
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
sub = info_dict[0]['subtitles'][0]
|
||||
self.assertEqual(md5(sub[2]), '356cdc577fde0c6783b9b822e7206ff7')
|
||||
sub = info_dict[0]['subtitles']['en']
|
||||
self.assertEqual(md5(sub), '356cdc577fde0c6783b9b822e7206ff7')
|
||||
def test_youtube_list_subtitles(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['listsubtitles'] = True
|
||||
@@ -85,11 +85,20 @@ class TestYoutubeSubtitles(unittest.TestCase):
|
||||
def test_youtube_automatic_captions(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writeautomaticsub'] = True
|
||||
DL.params['subtitleslang'] = 'it'
|
||||
DL.params['subtitleslangs'] = ['it']
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('8YoUxe5ncPo')
|
||||
sub = info_dict[0]['subtitles'][0]
|
||||
self.assertTrue(sub[2] is not None)
|
||||
sub = info_dict[0]['subtitles']['it']
|
||||
self.assertTrue(sub is not None)
|
||||
def test_youtube_multiple_langs(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writesubtitles'] = True
|
||||
langs = ['it', 'fr', 'de']
|
||||
DL.params['subtitleslangs'] = langs
|
||||
IE = YoutubeIE(DL)
|
||||
subtitles = IE.extract('QRS8MkLhQmM')[0]['subtitles']
|
||||
for lang in langs:
|
||||
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -79,9 +79,13 @@ class FileDownloader(object):
|
||||
rate = float(current) / dif
|
||||
eta = int((float(total) - float(current)) / rate)
|
||||
(eta_mins, eta_secs) = divmod(eta, 60)
|
||||
if eta_mins > 99:
|
||||
return '--:--'
|
||||
return '%02d:%02d' % (eta_mins, eta_secs)
|
||||
(eta_hours, eta_mins) = divmod(eta_mins, 60)
|
||||
if eta_hours > 99:
|
||||
return '--:--:--'
|
||||
if eta_hours == 0:
|
||||
return '%02d:%02d' % (eta_mins, eta_secs)
|
||||
else:
|
||||
return '%02d:%02d:%02d' % (eta_hours, eta_mins, eta_secs)
|
||||
|
||||
@staticmethod
|
||||
def calc_speed(start, now, bytes):
|
||||
@@ -329,6 +333,35 @@ class FileDownloader(object):
|
||||
self.report_error(u'mplayer exited with code %d' % retval)
|
||||
return False
|
||||
|
||||
def _download_m3u8_with_ffmpeg(self, filename, url):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
args = ['ffmpeg', '-y', '-i', url, '-f', 'mp4', tmpfilename]
|
||||
# Check for ffmpeg first
|
||||
try:
|
||||
subprocess.call(['ffmpeg', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'm3u8 download detected but "%s" could not be run' % args[0] )
|
||||
return False
|
||||
|
||||
retval = subprocess.call(args)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'ffmpeg exited with code %d' % retval)
|
||||
return False
|
||||
|
||||
|
||||
def _do_download(self, filename, info_dict):
|
||||
url = info_dict['url']
|
||||
@@ -354,6 +387,10 @@ class FileDownloader(object):
|
||||
if url.startswith('mms') or url.startswith('rtsp'):
|
||||
return self._download_with_mplayer(filename, url)
|
||||
|
||||
# m3u8 manifest are downloaded with ffmpeg
|
||||
if determine_ext(url) == u'm3u8':
|
||||
return self._download_m3u8_with_ffmpeg(filename, url)
|
||||
|
||||
tmpfilename = self.temp_name(filename)
|
||||
stream = None
|
||||
|
||||
|
@@ -71,12 +71,17 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
|
||||
return dict((program, executable(program)) for program in programs)
|
||||
|
||||
def run_ffmpeg(self, path, out_path, opts):
|
||||
def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
|
||||
if not self._exes['ffmpeg'] and not self._exes['avconv']:
|
||||
raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
|
||||
cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y', '-i', encodeFilename(path)]
|
||||
|
||||
files_cmd = []
|
||||
for path in input_paths:
|
||||
files_cmd.extend(['-i', encodeFilename(path)])
|
||||
cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd
|
||||
+ opts +
|
||||
[encodeFilename(self._ffmpeg_filename_argument(out_path))])
|
||||
|
||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
stdout,stderr = p.communicate()
|
||||
if p.returncode != 0:
|
||||
@@ -84,6 +89,9 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
msg = stderr.strip().split('\n')[-1]
|
||||
raise FFmpegPostProcessorError(msg)
|
||||
|
||||
def run_ffmpeg(self, path, out_path, opts):
|
||||
self.run_ffmpeg_multiple_files([path], out_path, opts)
|
||||
|
||||
def _ffmpeg_filename_argument(self, fn):
|
||||
# ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
|
||||
if fn.startswith(u'-'):
|
||||
@@ -100,7 +108,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
|
||||
self._nopostoverwrites = nopostoverwrites
|
||||
|
||||
def get_audio_codec(self, path):
|
||||
if not self._exes['ffprobe'] and not self._exes['avprobe']: return None
|
||||
if not self._exes['ffprobe'] and not self._exes['avprobe']:
|
||||
raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
|
||||
try:
|
||||
cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
|
||||
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
|
||||
@@ -208,7 +217,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
|
||||
try:
|
||||
os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
|
||||
except:
|
||||
self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
|
||||
self._downloader.report_warning(u'Cannot update utime of audio file')
|
||||
|
||||
information['filepath'] = new_path
|
||||
return self._nopostoverwrites,information
|
||||
@@ -231,3 +240,227 @@ class FFmpegVideoConvertor(FFmpegPostProcessor):
|
||||
information['format'] = self._preferedformat
|
||||
information['ext'] = self._preferedformat
|
||||
return False,information
|
||||
|
||||
|
||||
class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
|
||||
# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
|
||||
_lang_map = {
|
||||
'aa': 'aar',
|
||||
'ab': 'abk',
|
||||
'ae': 'ave',
|
||||
'af': 'afr',
|
||||
'ak': 'aka',
|
||||
'am': 'amh',
|
||||
'an': 'arg',
|
||||
'ar': 'ara',
|
||||
'as': 'asm',
|
||||
'av': 'ava',
|
||||
'ay': 'aym',
|
||||
'az': 'aze',
|
||||
'ba': 'bak',
|
||||
'be': 'bel',
|
||||
'bg': 'bul',
|
||||
'bh': 'bih',
|
||||
'bi': 'bis',
|
||||
'bm': 'bam',
|
||||
'bn': 'ben',
|
||||
'bo': 'bod',
|
||||
'br': 'bre',
|
||||
'bs': 'bos',
|
||||
'ca': 'cat',
|
||||
'ce': 'che',
|
||||
'ch': 'cha',
|
||||
'co': 'cos',
|
||||
'cr': 'cre',
|
||||
'cs': 'ces',
|
||||
'cu': 'chu',
|
||||
'cv': 'chv',
|
||||
'cy': 'cym',
|
||||
'da': 'dan',
|
||||
'de': 'deu',
|
||||
'dv': 'div',
|
||||
'dz': 'dzo',
|
||||
'ee': 'ewe',
|
||||
'el': 'ell',
|
||||
'en': 'eng',
|
||||
'eo': 'epo',
|
||||
'es': 'spa',
|
||||
'et': 'est',
|
||||
'eu': 'eus',
|
||||
'fa': 'fas',
|
||||
'ff': 'ful',
|
||||
'fi': 'fin',
|
||||
'fj': 'fij',
|
||||
'fo': 'fao',
|
||||
'fr': 'fra',
|
||||
'fy': 'fry',
|
||||
'ga': 'gle',
|
||||
'gd': 'gla',
|
||||
'gl': 'glg',
|
||||
'gn': 'grn',
|
||||
'gu': 'guj',
|
||||
'gv': 'glv',
|
||||
'ha': 'hau',
|
||||
'he': 'heb',
|
||||
'hi': 'hin',
|
||||
'ho': 'hmo',
|
||||
'hr': 'hrv',
|
||||
'ht': 'hat',
|
||||
'hu': 'hun',
|
||||
'hy': 'hye',
|
||||
'hz': 'her',
|
||||
'ia': 'ina',
|
||||
'id': 'ind',
|
||||
'ie': 'ile',
|
||||
'ig': 'ibo',
|
||||
'ii': 'iii',
|
||||
'ik': 'ipk',
|
||||
'io': 'ido',
|
||||
'is': 'isl',
|
||||
'it': 'ita',
|
||||
'iu': 'iku',
|
||||
'ja': 'jpn',
|
||||
'jv': 'jav',
|
||||
'ka': 'kat',
|
||||
'kg': 'kon',
|
||||
'ki': 'kik',
|
||||
'kj': 'kua',
|
||||
'kk': 'kaz',
|
||||
'kl': 'kal',
|
||||
'km': 'khm',
|
||||
'kn': 'kan',
|
||||
'ko': 'kor',
|
||||
'kr': 'kau',
|
||||
'ks': 'kas',
|
||||
'ku': 'kur',
|
||||
'kv': 'kom',
|
||||
'kw': 'cor',
|
||||
'ky': 'kir',
|
||||
'la': 'lat',
|
||||
'lb': 'ltz',
|
||||
'lg': 'lug',
|
||||
'li': 'lim',
|
||||
'ln': 'lin',
|
||||
'lo': 'lao',
|
||||
'lt': 'lit',
|
||||
'lu': 'lub',
|
||||
'lv': 'lav',
|
||||
'mg': 'mlg',
|
||||
'mh': 'mah',
|
||||
'mi': 'mri',
|
||||
'mk': 'mkd',
|
||||
'ml': 'mal',
|
||||
'mn': 'mon',
|
||||
'mr': 'mar',
|
||||
'ms': 'msa',
|
||||
'mt': 'mlt',
|
||||
'my': 'mya',
|
||||
'na': 'nau',
|
||||
'nb': 'nob',
|
||||
'nd': 'nde',
|
||||
'ne': 'nep',
|
||||
'ng': 'ndo',
|
||||
'nl': 'nld',
|
||||
'nn': 'nno',
|
||||
'no': 'nor',
|
||||
'nr': 'nbl',
|
||||
'nv': 'nav',
|
||||
'ny': 'nya',
|
||||
'oc': 'oci',
|
||||
'oj': 'oji',
|
||||
'om': 'orm',
|
||||
'or': 'ori',
|
||||
'os': 'oss',
|
||||
'pa': 'pan',
|
||||
'pi': 'pli',
|
||||
'pl': 'pol',
|
||||
'ps': 'pus',
|
||||
'pt': 'por',
|
||||
'qu': 'que',
|
||||
'rm': 'roh',
|
||||
'rn': 'run',
|
||||
'ro': 'ron',
|
||||
'ru': 'rus',
|
||||
'rw': 'kin',
|
||||
'sa': 'san',
|
||||
'sc': 'srd',
|
||||
'sd': 'snd',
|
||||
'se': 'sme',
|
||||
'sg': 'sag',
|
||||
'si': 'sin',
|
||||
'sk': 'slk',
|
||||
'sl': 'slv',
|
||||
'sm': 'smo',
|
||||
'sn': 'sna',
|
||||
'so': 'som',
|
||||
'sq': 'sqi',
|
||||
'sr': 'srp',
|
||||
'ss': 'ssw',
|
||||
'st': 'sot',
|
||||
'su': 'sun',
|
||||
'sv': 'swe',
|
||||
'sw': 'swa',
|
||||
'ta': 'tam',
|
||||
'te': 'tel',
|
||||
'tg': 'tgk',
|
||||
'th': 'tha',
|
||||
'ti': 'tir',
|
||||
'tk': 'tuk',
|
||||
'tl': 'tgl',
|
||||
'tn': 'tsn',
|
||||
'to': 'ton',
|
||||
'tr': 'tur',
|
||||
'ts': 'tso',
|
||||
'tt': 'tat',
|
||||
'tw': 'twi',
|
||||
'ty': 'tah',
|
||||
'ug': 'uig',
|
||||
'uk': 'ukr',
|
||||
'ur': 'urd',
|
||||
'uz': 'uzb',
|
||||
've': 'ven',
|
||||
'vi': 'vie',
|
||||
'vo': 'vol',
|
||||
'wa': 'wln',
|
||||
'wo': 'wol',
|
||||
'xh': 'xho',
|
||||
'yi': 'yid',
|
||||
'yo': 'yor',
|
||||
'za': 'zha',
|
||||
'zh': 'zho',
|
||||
'zu': 'zul',
|
||||
}
|
||||
|
||||
def __init__(self, downloader=None, subtitlesformat='srt'):
|
||||
super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
|
||||
self._subformat = subtitlesformat
|
||||
|
||||
@classmethod
|
||||
def _conver_lang_code(cls, code):
|
||||
"""Convert language code from ISO 639-1 to ISO 639-2/T"""
|
||||
return cls._lang_map.get(code[:2])
|
||||
|
||||
def run(self, information):
|
||||
if information['ext'] != u'mp4':
|
||||
self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
|
||||
return True, information
|
||||
sub_langs = [key for key in information['subtitles']]
|
||||
|
||||
filename = information['filepath']
|
||||
input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
|
||||
|
||||
opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
|
||||
for (i, lang) in enumerate(sub_langs):
|
||||
opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text'])
|
||||
lang_code = self._conver_lang_code(lang)
|
||||
if lang_code is not None:
|
||||
opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
|
||||
opts.extend(['-f', 'mp4'])
|
||||
|
||||
temp_filename = filename + u'.temp'
|
||||
self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
|
||||
self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
|
||||
os.remove(encodeFilename(filename))
|
||||
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||
|
||||
return True, information
|
||||
|
@@ -76,7 +76,7 @@ class YoutubeDL(object):
|
||||
allsubtitles: Downloads all the subtitles of the video
|
||||
listsubtitles: Lists all available subtitles for the video
|
||||
subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
|
||||
subtitleslang: Language of the subtitles to download
|
||||
subtitleslangs: List of languages of the subtitles to download
|
||||
keepvideo: Keep the video file after post-processing
|
||||
daterange: A DateRange object, download only if the upload_date is in the range.
|
||||
skip_download: Skip the actual download of the video file
|
||||
@@ -264,7 +264,7 @@ class YoutubeDL(object):
|
||||
self.report_error(u'Erroneous output template')
|
||||
return None
|
||||
except ValueError as err:
|
||||
self.report_error(u'Insufficient system charset ' + repr(preferredencoding()))
|
||||
self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
|
||||
return None
|
||||
|
||||
def _match_entry(self, info_dict):
|
||||
@@ -483,41 +483,28 @@ class YoutubeDL(object):
|
||||
self.report_error(u'Cannot write description file ' + descfn)
|
||||
return
|
||||
|
||||
if (self.params.get('writesubtitles', False) or self.params.get('writeautomaticsub')) and 'subtitles' in info_dict and info_dict['subtitles']:
|
||||
subtitles_are_requested = any([self.params.get('writesubtitles', False),
|
||||
self.params.get('writeautomaticsub'),
|
||||
self.params.get('allsubtitles', False)])
|
||||
|
||||
if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
|
||||
# subtitles download errors are already managed as troubles in relevant IE
|
||||
# that way it will silently go on when used with unsupporting IE
|
||||
subtitle = info_dict['subtitles'][0]
|
||||
(sub_error, sub_lang, sub) = subtitle
|
||||
subtitles = info_dict['subtitles']
|
||||
sub_format = self.params.get('subtitlesformat')
|
||||
if sub_error:
|
||||
self.report_warning("Some error while getting the subtitles")
|
||||
else:
|
||||
for sub_lang in subtitles.keys():
|
||||
sub = subtitles[sub_lang]
|
||||
if sub is None:
|
||||
continue
|
||||
try:
|
||||
sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
|
||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
||||
self.report_writesubtitles(sub_filename)
|
||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
|
||||
subfile.write(sub)
|
||||
subfile.write(sub)
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'Cannot write subtitles file ' + descfn)
|
||||
return
|
||||
|
||||
if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
|
||||
subtitles = info_dict['subtitles']
|
||||
sub_format = self.params.get('subtitlesformat')
|
||||
for subtitle in subtitles:
|
||||
(sub_error, sub_lang, sub) = subtitle
|
||||
if sub_error:
|
||||
self.report_warning("Some error while getting the subtitles")
|
||||
else:
|
||||
try:
|
||||
sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
|
||||
self.report_writesubtitles(sub_filename)
|
||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
|
||||
subfile.write(sub)
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'Cannot write subtitles file ' + descfn)
|
||||
return
|
||||
|
||||
if self.params.get('writeinfojson', False):
|
||||
infofn = filename + u'.info.json'
|
||||
self.report_writeinfojson(infofn)
|
||||
@@ -547,7 +534,7 @@ class YoutubeDL(object):
|
||||
try:
|
||||
success = self.fd._do_download(filename, info_dict)
|
||||
except (OSError, IOError) as err:
|
||||
raise UnavailableVideoError()
|
||||
raise UnavailableVideoError(err)
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self.report_error(u'unable to download video data: %s' % str(err))
|
||||
return
|
||||
@@ -594,7 +581,7 @@ class YoutubeDL(object):
|
||||
# No clear decision yet, let IE decide
|
||||
keep_video = keep_video_wish
|
||||
except PostProcessingError as e:
|
||||
self.to_stderr(u'ERROR: ' + e.msg)
|
||||
self.report_error(e.msg)
|
||||
if keep_video is False and not self.params.get('keepvideo', False):
|
||||
try:
|
||||
self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
|
||||
|
@@ -27,6 +27,7 @@ __authors__ = (
|
||||
'Johny Mo Swag',
|
||||
'Axel Noack',
|
||||
'Albert Kim',
|
||||
'Pierre Rudloff',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
@@ -82,6 +83,9 @@ def parseOpts(overrideArguments=None):
|
||||
|
||||
return "".join(opts)
|
||||
|
||||
def _comma_separated_values_options_callback(option, opt_str, value, parser):
|
||||
setattr(parser.values, option.dest, value.split(','))
|
||||
|
||||
def _find_term_columns():
|
||||
columns = os.environ.get('COLUMNS', None)
|
||||
if columns:
|
||||
@@ -119,6 +123,7 @@ def parseOpts(overrideArguments=None):
|
||||
selection = optparse.OptionGroup(parser, 'Video Selection')
|
||||
authentication = optparse.OptionGroup(parser, 'Authentication Options')
|
||||
video_format = optparse.OptionGroup(parser, 'Video Format Options')
|
||||
subtitles = optparse.OptionGroup(parser, 'Subtitle Options')
|
||||
downloader = optparse.OptionGroup(parser, 'Download Options')
|
||||
postproc = optparse.OptionGroup(parser, 'Post-processing Options')
|
||||
filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
|
||||
@@ -185,27 +190,29 @@ def parseOpts(overrideArguments=None):
|
||||
action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
|
||||
video_format.add_option('-F', '--list-formats',
|
||||
action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
|
||||
video_format.add_option('--write-sub', '--write-srt',
|
||||
|
||||
subtitles.add_option('--write-sub', '--write-srt',
|
||||
action='store_true', dest='writesubtitles',
|
||||
help='write subtitle file (currently youtube only)', default=False)
|
||||
video_format.add_option('--write-auto-sub', '--write-automatic-sub',
|
||||
subtitles.add_option('--write-auto-sub', '--write-automatic-sub',
|
||||
action='store_true', dest='writeautomaticsub',
|
||||
help='write automatic subtitle file (currently youtube only)', default=False)
|
||||
video_format.add_option('--only-sub',
|
||||
subtitles.add_option('--only-sub',
|
||||
action='store_true', dest='skip_download',
|
||||
help='[deprecated] alias of --skip-download', default=False)
|
||||
video_format.add_option('--all-subs',
|
||||
subtitles.add_option('--all-subs',
|
||||
action='store_true', dest='allsubtitles',
|
||||
help='downloads all the available subtitles of the video (currently youtube only)', default=False)
|
||||
video_format.add_option('--list-subs',
|
||||
help='downloads all the available subtitles of the video', default=False)
|
||||
subtitles.add_option('--list-subs',
|
||||
action='store_true', dest='listsubtitles',
|
||||
help='lists all available subtitles for the video (currently youtube only)', default=False)
|
||||
video_format.add_option('--sub-format',
|
||||
help='lists all available subtitles for the video', default=False)
|
||||
subtitles.add_option('--sub-format',
|
||||
action='store', dest='subtitlesformat', metavar='FORMAT',
|
||||
help='subtitle format [srt/sbv/vtt] (default=srt) (currently youtube only)', default='srt')
|
||||
video_format.add_option('--sub-lang', '--srt-lang',
|
||||
action='store', dest='subtitleslang', metavar='LANG',
|
||||
help='language of the subtitles to download (optional) use IETF language tags like \'en\'')
|
||||
help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt')
|
||||
subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang',
|
||||
action='callback', dest='subtitleslang', metavar='LANGS', type='str',
|
||||
default=[], callback=_comma_separated_values_options_callback,
|
||||
help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
|
||||
|
||||
downloader.add_option('-r', '--rate-limit',
|
||||
dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)')
|
||||
@@ -320,6 +327,8 @@ def parseOpts(overrideArguments=None):
|
||||
help='keeps the video file on disk after the post-processing; the video is erased by default')
|
||||
postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,
|
||||
help='do not overwrite post-processed files; the post-processed files are overwritten by default')
|
||||
postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
|
||||
help='embed subtitles in the video (only for mp4 videos)')
|
||||
|
||||
|
||||
parser.add_option_group(general)
|
||||
@@ -328,6 +337,7 @@ def parseOpts(overrideArguments=None):
|
||||
parser.add_option_group(filesystem)
|
||||
parser.add_option_group(verbosity)
|
||||
parser.add_option_group(video_format)
|
||||
parser.add_option_group(subtitles)
|
||||
parser.add_option_group(authentication)
|
||||
parser.add_option_group(postproc)
|
||||
|
||||
@@ -398,6 +408,8 @@ def _real_main(argv=None):
|
||||
batchurls = batchfd.readlines()
|
||||
batchurls = [x.strip() for x in batchurls]
|
||||
batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
|
||||
if opts.verbose:
|
||||
sys.stderr.write(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
|
||||
except IOError:
|
||||
sys.exit(u'ERROR: batch file could not be read')
|
||||
all_urls = batchurls + args
|
||||
@@ -565,7 +577,7 @@ def _real_main(argv=None):
|
||||
'allsubtitles': opts.allsubtitles,
|
||||
'listsubtitles': opts.listsubtitles,
|
||||
'subtitlesformat': opts.subtitlesformat,
|
||||
'subtitleslang': opts.subtitleslang,
|
||||
'subtitleslangs': opts.subtitleslang,
|
||||
'matchtitle': decodeOption(opts.matchtitle),
|
||||
'rejecttitle': decodeOption(opts.rejecttitle),
|
||||
'max_downloads': opts.max_downloads,
|
||||
@@ -605,6 +617,8 @@ def _real_main(argv=None):
|
||||
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
|
||||
if opts.recodevideo:
|
||||
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
|
||||
if opts.embedsubtitles:
|
||||
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
|
||||
|
||||
# Update version
|
||||
if opts.update_self:
|
||||
|
@@ -12,7 +12,7 @@ from .comedycentral import ComedyCentralIE
|
||||
from .condenast import CondeNastIE
|
||||
from .criterion import CriterionIE
|
||||
from .cspan import CSpanIE
|
||||
from .dailymotion import DailymotionIE
|
||||
from .dailymotion import DailymotionIE, DailymotionPlaylistIE
|
||||
from .depositfiles import DepositFilesIE
|
||||
from .dotsub import DotsubIE
|
||||
from .dreisat import DreiSatIE
|
||||
@@ -36,23 +36,31 @@ from .ign import IGNIE, OneUPIE
|
||||
from .ina import InaIE
|
||||
from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jukebox import JukeboxIE
|
||||
from .justintv import JustinTVIE
|
||||
from .kankan import KankanIE
|
||||
from .keek import KeekIE
|
||||
from .liveleak import LiveLeakIE
|
||||
from .livestream import LivestreamIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .mixcloud import MixcloudIE
|
||||
from .mtv import MTVIE
|
||||
from .muzu import MuzuTVIE
|
||||
from .myspass import MySpassIE
|
||||
from .myvideo import MyVideoIE
|
||||
from .nba import NBAIE
|
||||
from .ooyala import OoyalaIE
|
||||
from .pbs import PBSIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .pornotube import PornotubeIE
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .redtube import RedTubeIE
|
||||
from .ringtv import RingTVIE
|
||||
from .roxwel import RoxwelIE
|
||||
from .rtlnow import RTLnowIE
|
||||
from .sina import SinaIE
|
||||
from .slashdot import SlashdotIE
|
||||
from .soundcloud import SoundcloudIE, SoundcloudSetIE
|
||||
from .spiegel import SpiegelIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
@@ -67,10 +75,12 @@ from .tudou import TudouIE
|
||||
from .tumblr import TumblrIE
|
||||
from .tutv import TutvIE
|
||||
from .ustream import UstreamIE
|
||||
from .unistra import UnistraIE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .veoh import VeohIE
|
||||
from .vevo import VevoIE
|
||||
from .vimeo import VimeoIE
|
||||
from .videofyme import VideofyMeIE
|
||||
from .vimeo import VimeoIE, VimeoChannelIE
|
||||
from .vine import VineIE
|
||||
from .c56 import C56IE
|
||||
from .wat import WatIE
|
||||
@@ -93,6 +103,8 @@ from .youtube import (
|
||||
YoutubeShowIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeRecommendedIE,
|
||||
YoutubeWatchLaterIE,
|
||||
YoutubeFavouritesIE,
|
||||
)
|
||||
from .zdf import ZDFIE
|
||||
|
||||
|
@@ -17,13 +17,14 @@ class ArteTvIE(InfoExtractor):
|
||||
"""
|
||||
_EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
|
||||
_VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html'
|
||||
_LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
|
||||
_LIVE_URL = r'index-[0-9]+\.html$'
|
||||
|
||||
IE_NAME = u'arte.tv'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL))
|
||||
return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL, cls._LIVEWEB_URL))
|
||||
|
||||
# TODO implement Live Stream
|
||||
# from ..utils import compat_urllib_parse
|
||||
@@ -68,6 +69,12 @@ class ArteTvIE(InfoExtractor):
|
||||
lang = mobj.group('lang')
|
||||
return self._extract_video(url, id, lang)
|
||||
|
||||
mobj = re.match(self._LIVEWEB_URL, url)
|
||||
if mobj is not None:
|
||||
name = mobj.group('name')
|
||||
lang = mobj.group('lang')
|
||||
return self._extract_liveweb(url, name, lang)
|
||||
|
||||
if re.search(self._LIVE_URL, video_id) is not None:
|
||||
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
|
||||
# self.extractLiveStream(url)
|
||||
@@ -85,7 +92,7 @@ class ArteTvIE(InfoExtractor):
|
||||
|
||||
info_dict = {'id': player_info['VID'],
|
||||
'title': player_info['VTI'],
|
||||
'description': player_info['VDE'],
|
||||
'description': player_info.get('VDE'),
|
||||
'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]),
|
||||
'thumbnail': player_info['programImage'],
|
||||
'ext': 'flv',
|
||||
@@ -98,12 +105,14 @@ class ArteTvIE(InfoExtractor):
|
||||
l = 'F'
|
||||
elif lang == 'de':
|
||||
l = 'A'
|
||||
regexes = [r'VO?%s' % l, r'V%s-ST.' % l]
|
||||
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
|
||||
return any(re.match(r, f['versionCode']) for r in regexes)
|
||||
# Some formats may not be in the same language as the url
|
||||
formats = filter(_match_lang, formats)
|
||||
# We order the formats by quality
|
||||
formats = sorted(formats, key=lambda f: int(f['height']))
|
||||
# Prefer videos without subtitles in the same language
|
||||
formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f['versionCode']) is None)
|
||||
# Pick the best quality
|
||||
format_info = formats[-1]
|
||||
if format_info['mediaType'] == u'rtmp':
|
||||
@@ -144,3 +153,22 @@ class ArteTvIE(InfoExtractor):
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
}
|
||||
|
||||
def _extract_liveweb(self, url, name, lang):
|
||||
"""Extract form http://liveweb.arte.tv/"""
|
||||
webpage = self._download_webpage(url, name)
|
||||
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
|
||||
config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
|
||||
video_id, u'Downloading information')
|
||||
config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
||||
event_doc = config_doc.find('event')
|
||||
url_node = event_doc.find('video').find('urlHd')
|
||||
if url_node is None:
|
||||
url_node = video_doc.find('urlSd')
|
||||
|
||||
return {'id': video_id,
|
||||
'title': event_doc.find('name%s' % lang.capitalize()).text,
|
||||
'url': url_node.text.replace('MP4', 'mp4'),
|
||||
'ext': 'flv',
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
@@ -4,15 +4,16 @@ import xml.etree.ElementTree
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
determine_ext,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class CollegeHumorIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed)/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
||||
u'file': u'6902724.mp4',
|
||||
u'md5': u'1264c12ad95dca142a9f0bf7968105a0',
|
||||
@@ -20,7 +21,16 @@ class CollegeHumorIE(InfoExtractor):
|
||||
u'title': u'Comic-Con Cosplay Catastrophe',
|
||||
u'description': u'Fans get creative this year at San Diego. Too creative. And yes, that\'s really Joss Whedon.',
|
||||
},
|
||||
}
|
||||
},
|
||||
{
|
||||
u'url': u'http://www.collegehumor.com/video/3505939/font-conference',
|
||||
u'file': u'3505939.mp4',
|
||||
u'md5': u'c51ca16b82bb456a4397987791a835f5',
|
||||
u'info_dict': {
|
||||
u'title': u'Font Conference',
|
||||
u'description': u'This video wasn\'t long enough, so we made it double-spaced.',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -49,25 +59,29 @@ class CollegeHumorIE(InfoExtractor):
|
||||
info['description'] = videoNode.findall('./description')[0].text
|
||||
info['title'] = videoNode.findall('./caption')[0].text
|
||||
info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
|
||||
manifest_url = videoNode.findall('./file')[0].text
|
||||
next_url = videoNode.findall('./file')[0].text
|
||||
except IndexError:
|
||||
raise ExtractorError(u'Invalid metadata XML file')
|
||||
|
||||
manifest_url += '?hdcore=2.10.3'
|
||||
manifestXml = self._download_webpage(manifest_url, video_id,
|
||||
u'Downloading XML manifest',
|
||||
u'Unable to download video info XML')
|
||||
if next_url.endswith(u'manifest.f4m'):
|
||||
manifest_url = next_url + '?hdcore=2.10.3'
|
||||
manifestXml = self._download_webpage(manifest_url, video_id,
|
||||
u'Downloading XML manifest',
|
||||
u'Unable to download video info XML')
|
||||
|
||||
adoc = xml.etree.ElementTree.fromstring(manifestXml)
|
||||
try:
|
||||
media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
|
||||
node_id = media_node.attrib['url']
|
||||
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
||||
except IndexError as err:
|
||||
raise ExtractorError(u'Invalid manifest file')
|
||||
adoc = xml.etree.ElementTree.fromstring(manifestXml)
|
||||
try:
|
||||
media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
|
||||
node_id = media_node.attrib['url']
|
||||
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
||||
except IndexError as err:
|
||||
raise ExtractorError(u'Invalid manifest file')
|
||||
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
|
||||
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
|
||||
info['ext'] = 'mp4'
|
||||
else:
|
||||
# Old-style direct links
|
||||
info['url'] = next_url
|
||||
info['ext'] = determine_ext(info['url'])
|
||||
|
||||
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
|
||||
|
||||
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
|
||||
info['ext'] = 'mp4'
|
||||
return [info]
|
||||
return info
|
||||
|
@@ -47,7 +47,8 @@ class InfoExtractor(object):
|
||||
uploader_id: Nickname or id of the video uploader.
|
||||
location: Physical location of the video.
|
||||
player_url: SWF Player URL (used for rtmpdump).
|
||||
subtitles: The subtitle file contents.
|
||||
subtitles: The subtitle file contents as a dictionary in the format
|
||||
{language: subtitles}.
|
||||
view_count: How many users have watched the video on the platform.
|
||||
urlhandle: [internal] The urlHandle to be used to download the file,
|
||||
like returned by urllib.request.urlopen
|
||||
@@ -77,7 +78,13 @@ class InfoExtractor(object):
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
"""Receives a URL and returns True if suitable for this IE."""
|
||||
return re.match(cls._VALID_URL, url) is not None
|
||||
|
||||
# This does not use has/getattr intentionally - we want to know whether
|
||||
# we have cached the regexp for *this* class, whereas getattr would also
|
||||
# match the superclass
|
||||
if '_VALID_URL_RE' not in cls.__dict__:
|
||||
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
||||
return cls._VALID_URL_RE.match(url) is not None
|
||||
|
||||
@classmethod
|
||||
def working(cls):
|
||||
|
@@ -1,9 +1,12 @@
|
||||
import re
|
||||
import json
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
get_element_by_attribute,
|
||||
get_element_by_id,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
@@ -77,3 +80,31 @@ class DailymotionIE(InfoExtractor):
|
||||
'ext': video_extension,
|
||||
'thumbnail': info['thumbnail_url']
|
||||
}]
|
||||
|
||||
|
||||
class DailymotionPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
|
||||
_MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
video_ids = []
|
||||
|
||||
for pagenum in itertools.count(1):
|
||||
webpage = self._download_webpage('https://www.dailymotion.com/playlist/%s/%s' % (playlist_id, pagenum),
|
||||
playlist_id, u'Downloading page %s' % pagenum)
|
||||
|
||||
playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
|
||||
video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el))
|
||||
|
||||
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
||||
break
|
||||
|
||||
entries = [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
|
||||
for video_id in video_ids]
|
||||
return {'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': get_element_by_id(u'playlist_name', webpage),
|
||||
'entries': entries,
|
||||
}
|
||||
|
@@ -8,17 +8,30 @@ class ExfmIE(InfoExtractor):
|
||||
IE_NAME = u'exfm'
|
||||
IE_DESC = u'ex.fm'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)'
|
||||
_SOUNDCLOUD_URL_ = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
|
||||
_TEST = {
|
||||
u'url': u'http://ex.fm/song/1bgtzg',
|
||||
u'file': u'1bgtzg.mp3',
|
||||
u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf',
|
||||
u'info_dict': {
|
||||
u"title": u"We Can't Stop",
|
||||
u"uploader": u"Miley Cyrus",
|
||||
u'thumbnail': u'http://i1.sndcdn.com/artworks-000049666230-w9i7ef-t500x500.jpg?9d68d37'
|
||||
}
|
||||
}
|
||||
_SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
|
||||
_TESTS = [
|
||||
{
|
||||
u'url': u'http://ex.fm/song/1bgtzg',
|
||||
u'file': u'95223130.mp3',
|
||||
u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf',
|
||||
u'info_dict': {
|
||||
u"title": u"We Can't Stop - Miley Cyrus",
|
||||
u"uploader": u"Miley Cyrus",
|
||||
u'upload_date': u'20130603',
|
||||
u'description': u'Download "We Can\'t Stop" \r\niTunes: http://smarturl.it/WeCantStop?IQid=SC\r\nAmazon: http://smarturl.it/WeCantStopAMZ?IQid=SC',
|
||||
},
|
||||
u'note': u'Soundcloud song',
|
||||
},
|
||||
{
|
||||
u'url': u'http://ex.fm/song/wddt8',
|
||||
u'file': u'wddt8.mp3',
|
||||
u'md5': u'966bd70741ac5b8570d8e45bfaed3643',
|
||||
u'info_dict': {
|
||||
u'title': u'Safe and Sound',
|
||||
u'uploader': u'Capital Cities',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -26,11 +39,10 @@ class ExfmIE(InfoExtractor):
|
||||
info_url = "http://ex.fm/api/v3/song/%s" %(song_id)
|
||||
webpage = self._download_webpage(info_url, song_id)
|
||||
info = json.loads(webpage)
|
||||
song_url = re.match(self._SOUNDCLOUD_URL_,info['song']['url'])
|
||||
if song_url is not None:
|
||||
song_url = song_url.group() + "?client_id=b45b1aa10f1ac2941910a7f0d10f8e28"
|
||||
else:
|
||||
song_url = info['song']['url']
|
||||
song_url = info['song']['url']
|
||||
if re.match(self._SOUNDCLOUD_URL, song_url) is not None:
|
||||
self.to_screen('Soundcloud song detected')
|
||||
return self.url_result(song_url.replace('/stream',''), 'Soundcloud')
|
||||
return [{
|
||||
'id': song_id,
|
||||
'url': song_url,
|
||||
|
@@ -21,17 +21,14 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._html_search_regex(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"',
|
||||
video_url = self._search_regex(r'type: "video/mp4", src: "(.*?)"',
|
||||
webpage, u'video URL', flags=re.DOTALL)
|
||||
|
||||
title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>",
|
||||
r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': title,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
||||
return [info]
|
||||
|
@@ -107,8 +107,13 @@ class GenericIE(InfoExtractor):
|
||||
return new_url
|
||||
|
||||
def _real_extract(self, url):
|
||||
new_url = self._test_redirect(url)
|
||||
if new_url: return [self.url_result(new_url)]
|
||||
try:
|
||||
new_url = self._test_redirect(url)
|
||||
if new_url:
|
||||
return [self.url_result(new_url)]
|
||||
except compat_urllib_error.HTTPError:
|
||||
# This may be a stupid server that doesn't like HEAD, our UA, or so
|
||||
pass
|
||||
|
||||
video_id = url.split('/')[-1]
|
||||
try:
|
||||
@@ -144,6 +149,9 @@ class GenericIE(InfoExtractor):
|
||||
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
|
||||
if m_video_type is not None:
|
||||
mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
|
||||
if mobj is None:
|
||||
# HTML5 video
|
||||
mobj = re.search(r'<video[^<]*>.*?<source .*?src="([^"]+)"', webpage, flags=re.DOTALL)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
|
@@ -5,7 +5,7 @@ from .common import InfoExtractor
|
||||
|
||||
class InaIE(InfoExtractor):
|
||||
"""Information Extractor for Ina.fr"""
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I?[A-F0-9]+)/.*'
|
||||
_TEST = {
|
||||
u'url': u'www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
|
||||
u'file': u'I12055569.mp4',
|
||||
|
47
youtube_dl/extractor/jeuxvideo.py
Normal file
47
youtube_dl/extractor/jeuxvideo.py
Normal file
@@ -0,0 +1,47 @@
|
||||
# coding: utf-8
|
||||
|
||||
import json
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
class JeuxVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
||||
u'file': u'5182.mp4',
|
||||
u'md5': u'e0fdb0cd3ce98713ef9c1e1e025779d0',
|
||||
u'info_dict': {
|
||||
u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
||||
u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = re.match(self._VALID_URL, url).group(1)
|
||||
webpage = self._download_webpage(url, title)
|
||||
m_download = re.search(r'<param name="flashvars" value="config=(.*?)" />', webpage)
|
||||
|
||||
xml_link = m_download.group(1)
|
||||
|
||||
id = re.search(r'http://www.jeuxvideo.com/config/\w+/0011/(.*?)/\d+_player\.xml', xml_link).group(1)
|
||||
|
||||
xml_config = self._download_webpage(xml_link, title,
|
||||
'Downloading XML config')
|
||||
config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
|
||||
info = re.search(r'<format\.json>(.*?)</format\.json>',
|
||||
xml_config, re.MULTILINE|re.DOTALL).group(1)
|
||||
info = json.loads(info)['versions'][0]
|
||||
|
||||
video_url = 'http://video720.jeuxvideo.com/' + info['file']
|
||||
|
||||
return {'id': id,
|
||||
'title' : config.find('titre_video').text,
|
||||
'ext' : 'mp4',
|
||||
'url' : video_url,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': config.find('image').text,
|
||||
}
|
37
youtube_dl/extractor/kankan.py
Normal file
37
youtube_dl/extractor/kankan.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
|
||||
|
||||
class KankanIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://yinyue.kankan.com/vod/48/48863.shtml',
|
||||
u'file': u'48863.flv',
|
||||
u'md5': u'29aca1e47ae68fc28804aca89f29507e',
|
||||
u'info_dict': {
|
||||
u'title': u'Ready To Go',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._search_regex(r'G_TITLE=[\'"](.+?)[\'"]', webpage, u'video title')
|
||||
gcid = self._search_regex(r'lurl:[\'"]http://.+?/.+?/(.+?)/', webpage, u'gcid')
|
||||
|
||||
video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
|
||||
video_id, u'Downloading video url info')
|
||||
ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip')
|
||||
path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path')
|
||||
video_url = 'http://%s%s' % (ip, path)
|
||||
|
||||
return {'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'ext': determine_ext(video_url),
|
||||
}
|
@@ -4,10 +4,10 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class KeekIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
|
||||
IE_NAME = u'keek'
|
||||
_TEST = {
|
||||
u'url': u'http://www.keek.com/ytdl/keeks/NODfbab',
|
||||
u'url': u'https://www.keek.com/ytdl/keeks/NODfbab',
|
||||
u'file': u'NODfbab.mp4',
|
||||
u'md5': u'9b0636f8c0f7614afa4ea5e4c6e57e83',
|
||||
u'info_dict': {
|
||||
|
64
youtube_dl/extractor/muzu.py
Normal file
64
youtube_dl/extractor/muzu.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class MuzuTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.muzu.tv/(.+?)/(.+?)/(?P<id>\d+)'
|
||||
IE_NAME = u'muzu.tv'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/',
|
||||
u'file': u'1981454.mp4',
|
||||
u'md5': u'98f8b2c7bc50578d6a0364fff2bfb000',
|
||||
u'info_dict': {
|
||||
u'title': u'Cat Walk (Original Mix)',
|
||||
u'description': u'md5:90e868994de201b2570e4e5854e19420',
|
||||
u'uploader': u'MarcAshken featuring SOS',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
info_data = compat_urllib_parse.urlencode({'format': 'json',
|
||||
'url': url,
|
||||
})
|
||||
video_info_page = self._download_webpage('http://www.muzu.tv/api/oembed/?%s' % info_data,
|
||||
video_id, u'Downloading video info')
|
||||
info = json.loads(video_info_page)
|
||||
|
||||
player_info_page = self._download_webpage('http://player.muzu.tv/player/playerInit?ai=%s' % video_id,
|
||||
video_id, u'Downloading player info')
|
||||
video_info = json.loads(player_info_page)['videos'][0]
|
||||
for quality in ['1080' , '720', '480', '360']:
|
||||
if video_info.get('v%s' % quality):
|
||||
break
|
||||
|
||||
data = compat_urllib_parse.urlencode({'ai': video_id,
|
||||
# Even if each time you watch a video the hash changes,
|
||||
# it seems to work for different videos, and it will work
|
||||
# even if you use any non empty string as a hash
|
||||
'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k',
|
||||
'device': 'web',
|
||||
'qv': quality,
|
||||
})
|
||||
video_url_page = self._download_webpage('http://player.muzu.tv/player/requestVideo?%s' % data,
|
||||
video_id, u'Downloading video url')
|
||||
video_url_info = json.loads(video_url_page)
|
||||
video_url = video_url_info['url']
|
||||
|
||||
return {'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': video_url,
|
||||
'ext': determine_ext(video_url),
|
||||
'thumbnail': info['thumbnail_url'],
|
||||
'description': info['description'],
|
||||
'uploader': info['author_name'],
|
||||
}
|
@@ -2,11 +2,13 @@ import binascii
|
||||
import base64
|
||||
import hashlib
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_ord,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
@@ -16,7 +18,7 @@ from ..utils import (
|
||||
class MyVideoIE(InfoExtractor):
|
||||
"""Information Extractor for myvideo.de."""
|
||||
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*'
|
||||
IE_NAME = u'myvideo'
|
||||
_TEST = {
|
||||
u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
|
||||
@@ -85,6 +87,20 @@ class MyVideoIE(InfoExtractor):
|
||||
'ext': video_ext,
|
||||
}]
|
||||
|
||||
mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
|
||||
if mobj is not None:
|
||||
request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
|
||||
response = self._download_webpage(request, video_id,
|
||||
u'Downloading video info')
|
||||
info = json.loads(base64.b64decode(response).decode('utf-8'))
|
||||
return {'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
|
||||
'play_path': info['filename'],
|
||||
'ext': 'flv',
|
||||
'thumbnail': info['thumbnail'][0]['url'],
|
||||
}
|
||||
|
||||
# try encxml
|
||||
mobj = re.search('var flashvars={(.+?)}', webpage)
|
||||
if mobj is None:
|
||||
|
52
youtube_dl/extractor/ooyala.py
Normal file
52
youtube_dl/extractor/ooyala.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unescapeHTML
|
||||
|
||||
class OoyalaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P<id>.+?)(&|$)'
|
||||
|
||||
_TEST = {
|
||||
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
|
||||
u'url': u'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
|
||||
u'file': u'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8.mp4',
|
||||
u'md5': u'3f5cceb3a7bf461d6c29dc466cf8033c',
|
||||
u'info_dict': {
|
||||
u'title': u'Explaining Data Recovery from Hard Drives and SSDs',
|
||||
u'description': u'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
|
||||
},
|
||||
}
|
||||
|
||||
def _extract_result(self, info, more_info):
|
||||
return {'id': info['embedCode'],
|
||||
'ext': 'mp4',
|
||||
'title': unescapeHTML(info['title']),
|
||||
'url': info['url'],
|
||||
'description': unescapeHTML(more_info['description']),
|
||||
'thumbnail': more_info['promo'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
embedCode = mobj.group('id')
|
||||
player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode
|
||||
player = self._download_webpage(player_url, embedCode)
|
||||
mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
|
||||
player, u'mobile player url')
|
||||
mobile_player = self._download_webpage(mobile_url, embedCode)
|
||||
videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"')
|
||||
videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"')
|
||||
videos_info = json.loads(videos_info)
|
||||
videos_more_info =json.loads(videos_more_info)
|
||||
|
||||
if videos_more_info.get('lineup'):
|
||||
videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
|
||||
return {'_type': 'playlist',
|
||||
'id': embedCode,
|
||||
'title': unescapeHTML(videos_more_info['title']),
|
||||
'entries': videos,
|
||||
}
|
||||
else:
|
||||
return self._extract_result(videos_info[0], videos_more_info)
|
||||
|
34
youtube_dl/extractor/pbs.py
Normal file
34
youtube_dl/extractor/pbs.py
Normal file
@@ -0,0 +1,34 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class PBSIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video.pbs.org/video/(?P<id>\d+)/?'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://video.pbs.org/video/2365006249/',
|
||||
u'file': u'2365006249.mp4',
|
||||
u'md5': 'ce1888486f0908d555a8093cac9a7362',
|
||||
u'info_dict': {
|
||||
u'title': u'A More Perfect Union',
|
||||
u'description': u'md5:ba0c207295339c8d6eced00b7c363c6a',
|
||||
u'duration': 3190,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
||||
info_page = self._download_webpage(info_url, video_id)
|
||||
info =json.loads(info_page)
|
||||
return {'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': info['alternate_encoding']['url'],
|
||||
'ext': 'mp4',
|
||||
'description': info['program'].get('description'),
|
||||
'thumbnail': info.get('image_url'),
|
||||
'duration': info.get('duration'),
|
||||
}
|
49
youtube_dl/extractor/roxwel.py
Normal file
49
youtube_dl/extractor/roxwel.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate, determine_ext
|
||||
|
||||
|
||||
class RoxwelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.roxwel.com/player/passionpittakeawalklive.html',
|
||||
u'file': u'passionpittakeawalklive.flv',
|
||||
u'md5': u'd9dea8360a1e7d485d2206db7fe13035',
|
||||
u'info_dict': {
|
||||
u'title': u'Take A Walk (live)',
|
||||
u'uploader': u'Passion Pit',
|
||||
u'description': u'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ',
|
||||
},
|
||||
u'skip': u'Requires rtmpdump',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
filename = mobj.group('filename')
|
||||
info_url = 'http://www.roxwel.com/api/videos/%s' % filename
|
||||
info_page = self._download_webpage(info_url, filename,
|
||||
u'Downloading video info')
|
||||
|
||||
self.report_extraction(filename)
|
||||
info = json.loads(info_page)
|
||||
rtmp_rates = sorted([int(r.replace('flv_', '')) for r in info['media_rates'] if r.startswith('flv_')])
|
||||
best_rate = rtmp_rates[-1]
|
||||
url_page_url = 'http://roxwel.com/pl_one_time.php?filename=%s&quality=%s' % (filename, best_rate)
|
||||
rtmp_url = self._download_webpage(url_page_url, filename, u'Downloading video url')
|
||||
ext = determine_ext(rtmp_url)
|
||||
if ext == 'f4v':
|
||||
rtmp_url = rtmp_url.replace(filename, 'mp4:%s' % filename)
|
||||
|
||||
return {'id': filename,
|
||||
'title': info['title'],
|
||||
'url': rtmp_url,
|
||||
'ext': 'flv',
|
||||
'description': info['description'],
|
||||
'thumbnail': info.get('player_image_url') or info.get('image_url_large'),
|
||||
'uploader': info['artist'],
|
||||
'uploader_id': info['artistname'],
|
||||
'upload_date': unified_strdate(info['dbdate']),
|
||||
}
|
113
youtube_dl/extractor/rtlnow.py
Normal file
113
youtube_dl/extractor/rtlnow.py
Normal file
@@ -0,0 +1,113 @@
|
||||
# encoding: utf-8
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
class RTLnowIE(InfoExtractor):
|
||||
"""Information Extractor for RTLnow, RTL2now and VOXnow"""
|
||||
_VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl(?:(?P<is_rtl2>2)|-)now\.rtl(?(is_rtl2)2|)\.de/|(?:www\.)?voxnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
|
||||
_TESTS = [{
|
||||
u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
|
||||
u'file': u'90419.flv',
|
||||
u'info_dict': {
|
||||
u'upload_date': u'20070416',
|
||||
u'title': u'Ahornallee - Folge 1 - Der Einzug',
|
||||
u'description': u'Folge 1 - Der Einzug',
|
||||
},
|
||||
u'params': {
|
||||
u'skip_download': True,
|
||||
},
|
||||
u'skip': u'Only works from Germany',
|
||||
},
|
||||
{
|
||||
u'url': u'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5',
|
||||
u'file': u'69756.flv',
|
||||
u'info_dict': {
|
||||
u'upload_date': u'20120519',
|
||||
u'title': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...',
|
||||
u'description': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.',
|
||||
u'thumbnail': u'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg',
|
||||
},
|
||||
u'params': {
|
||||
u'skip_download': True,
|
||||
},
|
||||
u'skip': u'Only works from Germany',
|
||||
},
|
||||
{
|
||||
u'url': u'www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17',
|
||||
u'file': u'13883.flv',
|
||||
u'info_dict': {
|
||||
u'upload_date': u'20090627',
|
||||
u'title': u'Voxtours - Südafrika-Reporter II',
|
||||
u'description': u'Südafrika-Reporter II',
|
||||
},
|
||||
u'params': {
|
||||
u'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self,url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
webpage_url = u'http://' + mobj.group('url')
|
||||
video_page_url = u'http://' + mobj.group('base_url')
|
||||
video_id = mobj.group(u'video_id')
|
||||
|
||||
webpage = self._download_webpage(webpage_url, video_id)
|
||||
|
||||
note_m = re.search(r'''(?sx)
|
||||
<div[ ]style="margin-left:[ ]20px;[ ]font-size:[ ]13px;">(.*?)
|
||||
<div[ ]id="playerteaser">''', webpage)
|
||||
if note_m:
|
||||
msg = clean_html(note_m.group(1))
|
||||
raise ExtractorError(msg)
|
||||
|
||||
video_title = self._html_search_regex(r'<title>(?P<title>[^<]+)</title>',
|
||||
webpage, u'title')
|
||||
playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'',
|
||||
webpage, u'playerdata_url')
|
||||
|
||||
playerdata = self._download_webpage(playerdata_url, video_id)
|
||||
mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr\]\]></title>', playerdata)
|
||||
if mobj:
|
||||
video_description = mobj.group(u'description')
|
||||
if mobj.group('upload_date_Y'):
|
||||
video_upload_date = mobj.group('upload_date_Y')
|
||||
else:
|
||||
video_upload_date = u'20' + mobj.group('upload_date_y')
|
||||
video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d')
|
||||
else:
|
||||
video_description = None
|
||||
video_upload_date = None
|
||||
self._downloader.report_warning(u'Unable to extract description and upload date')
|
||||
|
||||
# Thumbnail: not every video has an thumbnail
|
||||
mobj = re.search(r'<meta property="og:image" content="(?P<thumbnail>[^"]+)">', webpage)
|
||||
if mobj:
|
||||
video_thumbnail = mobj.group(u'thumbnail')
|
||||
else:
|
||||
video_thumbnail = None
|
||||
|
||||
mobj = re.search(r'<filename [^>]+><!\[CDATA\[(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>[^\]]+)\]\]></filename>', playerdata)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract media URL')
|
||||
video_url = mobj.group(u'url')
|
||||
video_play_path = u'mp4:' + mobj.group(u'play_path')
|
||||
video_player_url = video_page_url + u'includes/vodplayer.swf'
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'play_path': video_play_path,
|
||||
'page_url': video_page_url,
|
||||
'player_url': video_player_url,
|
||||
'ext': 'flv',
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'upload_date': video_upload_date,
|
||||
'thumbnail': video_thumbnail,
|
||||
}]
|
23
youtube_dl/extractor/slashdot.py
Normal file
23
youtube_dl/extractor/slashdot.py
Normal file
@@ -0,0 +1,23 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class SlashdotIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz',
|
||||
u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4',
|
||||
u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735',
|
||||
u'info_dict': {
|
||||
u'title': u' Meet the Stampede Supercomputing Cluster\'s Administrator',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
ooyala_url = self._search_regex(r'<script src="(.*?)"', webpage, 'ooyala url')
|
||||
return self.url_result(ooyala_url, 'Ooyala')
|
@@ -4,6 +4,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
@@ -19,7 +20,12 @@ class SoundcloudIE(InfoExtractor):
|
||||
of the stream token and uid
|
||||
"""
|
||||
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$'
|
||||
_VALID_URL = r'''^(?:https?://)?
|
||||
(?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)
|
||||
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
|
||||
|(?P<widget>w.soundcloud.com/player/?.*?url=.*)
|
||||
)
|
||||
'''
|
||||
IE_NAME = u'soundcloud'
|
||||
_TEST = {
|
||||
u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
|
||||
@@ -33,59 +39,68 @@ class SoundcloudIE(InfoExtractor):
|
||||
}
|
||||
}
|
||||
|
||||
_CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None
|
||||
|
||||
def report_resolve(self, video_id):
|
||||
"""Report information extraction."""
|
||||
self.to_screen(u'%s: Resolving id' % video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
@classmethod
|
||||
def _resolv_url(cls, url):
|
||||
return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
|
||||
|
||||
# extract uploader (which is in the url)
|
||||
uploader = mobj.group(1)
|
||||
# extract simple title (uploader + slug of song title)
|
||||
slug_title = mobj.group(2)
|
||||
full_title = '%s/%s' % (uploader, slug_title)
|
||||
|
||||
self.report_resolve(full_title)
|
||||
|
||||
url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title)
|
||||
resolv_url = 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
|
||||
info_json = self._download_webpage(resolv_url, full_title, u'Downloading info JSON')
|
||||
|
||||
info = json.loads(info_json)
|
||||
def _extract_info_dict(self, info, full_title=None):
|
||||
video_id = info['id']
|
||||
self.report_extraction(full_title)
|
||||
name = full_title or video_id
|
||||
self.report_extraction(name)
|
||||
|
||||
streams_url = 'https://api.sndcdn.com/i1/tracks/' + str(video_id) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
|
||||
stream_json = self._download_webpage(streams_url, full_title,
|
||||
u'Downloading stream definitions',
|
||||
u'unable to download stream definitions')
|
||||
|
||||
streams = json.loads(stream_json)
|
||||
mediaURL = streams['http_mp3_128_url']
|
||||
upload_date = unified_strdate(info['created_at'])
|
||||
|
||||
return [{
|
||||
thumbnail = info['artwork_url']
|
||||
if thumbnail is not None:
|
||||
thumbnail = thumbnail.replace('-large', '-t500x500')
|
||||
return {
|
||||
'id': info['id'],
|
||||
'url': mediaURL,
|
||||
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
|
||||
'uploader': info['user']['username'],
|
||||
'upload_date': upload_date,
|
||||
'upload_date': unified_strdate(info['created_at']),
|
||||
'title': info['title'],
|
||||
'ext': u'mp3',
|
||||
'description': info['description'],
|
||||
}]
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
class SoundcloudSetIE(InfoExtractor):
|
||||
"""Information extractor for soundcloud.com sets
|
||||
To access the media, the uid of the song and a stream token
|
||||
must be extracted from the page source and the script must make
|
||||
a request to media.soundcloud.com/crossdomain.xml. Then
|
||||
the media can be grabbed by requesting from an url composed
|
||||
of the stream token and uid
|
||||
"""
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
track_id = mobj.group('track_id')
|
||||
if track_id is not None:
|
||||
info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
|
||||
full_title = track_id
|
||||
elif mobj.group('widget'):
|
||||
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
return self.url_result(query['url'][0], ie='Soundcloud')
|
||||
else:
|
||||
# extract uploader (which is in the url)
|
||||
uploader = mobj.group(1)
|
||||
# extract simple title (uploader + slug of song title)
|
||||
slug_title = mobj.group(2)
|
||||
full_title = '%s/%s' % (uploader, slug_title)
|
||||
|
||||
self.report_resolve(full_title)
|
||||
|
||||
url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title)
|
||||
info_json_url = self._resolv_url(url)
|
||||
info_json = self._download_webpage(info_json_url, full_title, u'Downloading info JSON')
|
||||
|
||||
info = json.loads(info_json)
|
||||
return self._extract_info_dict(info, full_title)
|
||||
|
||||
class SoundcloudSetIE(SoundcloudIE):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
|
||||
IE_NAME = u'soundcloud:set'
|
||||
_TEST = {
|
||||
@@ -153,10 +168,6 @@ class SoundcloudSetIE(InfoExtractor):
|
||||
]
|
||||
}
|
||||
|
||||
def report_resolve(self, video_id):
|
||||
"""Report information extraction."""
|
||||
self.to_screen(u'%s: Resolving id' % video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
@@ -171,7 +182,7 @@ class SoundcloudSetIE(InfoExtractor):
|
||||
self.report_resolve(full_title)
|
||||
|
||||
url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
|
||||
resolv_url = 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
|
||||
resolv_url = self._resolv_url(url)
|
||||
info_json = self._download_webpage(resolv_url, full_title)
|
||||
|
||||
videos = []
|
||||
@@ -182,23 +193,8 @@ class SoundcloudSetIE(InfoExtractor):
|
||||
return
|
||||
|
||||
self.report_extraction(full_title)
|
||||
for track in info['tracks']:
|
||||
video_id = track['id']
|
||||
|
||||
streams_url = 'https://api.sndcdn.com/i1/tracks/' + str(video_id) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
|
||||
stream_json = self._download_webpage(streams_url, video_id, u'Downloading track info JSON')
|
||||
|
||||
self.report_extraction(video_id)
|
||||
streams = json.loads(stream_json)
|
||||
mediaURL = streams['http_mp3_128_url']
|
||||
|
||||
videos.append({
|
||||
'id': video_id,
|
||||
'url': mediaURL,
|
||||
'uploader': track['user']['username'],
|
||||
'upload_date': unified_strdate(track['created_at']),
|
||||
'title': track['title'],
|
||||
'ext': u'mp3',
|
||||
'description': track['description'],
|
||||
})
|
||||
return videos
|
||||
return {'_type': 'playlist',
|
||||
'entries': [self._extract_info_dict(track) for track in info['tracks']],
|
||||
'id': info['id'],
|
||||
'title': info['title'],
|
||||
}
|
||||
|
@@ -5,13 +5,13 @@ from .common import InfoExtractor
|
||||
class StatigramIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://statigr.am/p/484091715184808010_284179915',
|
||||
u'file': u'484091715184808010_284179915.mp4',
|
||||
u'md5': u'deda4ff333abe2e118740321e992605b',
|
||||
u'url': u'http://statigr.am/p/522207370455279102_24101272',
|
||||
u'file': u'522207370455279102_24101272.mp4',
|
||||
u'md5': u'6eb93b882a3ded7c378ee1d6884b1814',
|
||||
u'info_dict': {
|
||||
u"uploader_id": u"videoseconds",
|
||||
u"title": u"Instagram photo by @videoseconds"
|
||||
}
|
||||
u'uploader_id': u'aguynamedpatrick',
|
||||
u'title': u'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -33,7 +33,7 @@ class TeamcocoIE(InfoExtractor):
|
||||
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
||||
data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
|
||||
|
||||
video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>',
|
||||
video_url = self._html_search_regex(r'<file [^>]*type="high".*?>(.*?)</file>',
|
||||
data, u'video URL')
|
||||
|
||||
return [{
|
||||
|
@@ -6,20 +6,17 @@ import re
|
||||
from .common import InfoExtractor
|
||||
|
||||
class TF1IE(InfoExtractor):
|
||||
"""
|
||||
TF1 uses the wat.tv player, currently it can only download videos with the
|
||||
html5 player enabled, it cannot download HD videos.
|
||||
"""
|
||||
_WORKING = False
|
||||
"""TF1 uses the wat.tv player."""
|
||||
_VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html'
|
||||
_TEST = {
|
||||
u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
|
||||
u'file': u'10635995.mp4',
|
||||
u'md5': u'66789d3e91278d332f75e1feb7aea327',
|
||||
u'md5': u'2e378cc28b9957607d5e88f274e637d8',
|
||||
u'info_dict': {
|
||||
u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle',
|
||||
u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
|
||||
}
|
||||
},
|
||||
u'skip': u'Sometimes wat serves the whole file with the --test option',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
32
youtube_dl/extractor/unistra.py
Normal file
32
youtube_dl/extractor/unistra.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
class UnistraIE(InfoExtractor):
|
||||
_VALID_URL = r'http://utv.unistra.fr/(?:index|video).php\?id_video\=(\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://utv.unistra.fr/video.php?id_video=154',
|
||||
u'file': u'154.mp4',
|
||||
u'md5': u'736f605cfdc96724d55bb543ab3ced24',
|
||||
u'info_dict': {
|
||||
u'title': u'M!ss Yella',
|
||||
u'description': u'md5:75e8439a3e2981cd5d4b6db232e8fdfc',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = re.match(self._VALID_URL, url).group(1)
|
||||
webpage = self._download_webpage(url, id)
|
||||
file = re.search(r'file: "(.*?)",', webpage).group(1)
|
||||
title = self._html_search_regex(r'<title>UTV - (.*?)</', webpage, u'title')
|
||||
|
||||
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file
|
||||
|
||||
return {'id': id,
|
||||
'title': title,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
'description': self._html_search_regex(r'<meta name="Description" content="(.*?)"', webpage, u'description', flags=re.DOTALL),
|
||||
'thumbnail': self._search_regex(r'image: "(.*?)"', webpage, u'thumbnail'),
|
||||
}
|
@@ -8,10 +8,10 @@ from ..utils import (
|
||||
|
||||
class VevoIE(InfoExtractor):
|
||||
"""
|
||||
Accecps urls from vevo.com or in the format 'vevo:{id}'
|
||||
Accepts urls from vevo.com or in the format 'vevo:{id}'
|
||||
(currently used by MTVIE)
|
||||
"""
|
||||
_VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*)$'
|
||||
_VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
||||
u'file': u'GB1101300280.mp4',
|
||||
@@ -19,7 +19,7 @@ class VevoIE(InfoExtractor):
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20130624",
|
||||
u"uploader": u"Hurts",
|
||||
u"title": u"Somebody To Die For"
|
||||
u"title": u"Somebody to Die For"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,12 +35,12 @@ class VevoIE(InfoExtractor):
|
||||
|
||||
self.report_extraction(video_id)
|
||||
video_info = json.loads(info_json)
|
||||
m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):(?P<url>.*?)"', links_webpage))
|
||||
m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):/?(?P<url>.*?)"', links_webpage))
|
||||
if m_urls is None or len(m_urls) == 0:
|
||||
raise ExtractorError(u'Unable to extract video url')
|
||||
# They are sorted from worst to best quality
|
||||
m_url = m_urls[-1]
|
||||
video_url = base_url + m_url.group('url')
|
||||
video_url = base_url + '/' + m_url.group('url')
|
||||
ext = m_url.group('ext')
|
||||
|
||||
return {'url': video_url,
|
||||
|
48
youtube_dl/extractor/videofyme.py
Normal file
48
youtube_dl/extractor/videofyme.py
Normal file
@@ -0,0 +1,48 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
find_xpath_attr,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
class VideofyMeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(www.videofy.me/.+?|p.videofy.me/v)/(?P<id>\d+)(&|#|$)'
|
||||
IE_NAME = u'videofy.me'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.videofy.me/thisisvideofyme/1100701',
|
||||
u'file': u'1100701.mp4',
|
||||
u'md5': u'c77d700bdc16ae2e9f3c26019bd96143',
|
||||
u'info_dict': {
|
||||
u'title': u'This is VideofyMe',
|
||||
u'description': None,
|
||||
u'uploader': u'VideofyMe',
|
||||
u'uploader_id': u'thisisvideofyme',
|
||||
},
|
||||
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
config_xml = self._download_webpage('http://sunshine.videofy.me/?videoId=%s' % video_id,
|
||||
video_id)
|
||||
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
||||
video = config.find('video')
|
||||
sources = video.find('sources')
|
||||
url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)
|
||||
for key in ['on', 'av', 'off']] if node is not None)
|
||||
video_url = url_node.find('url').text
|
||||
|
||||
return {'id': video_id,
|
||||
'title': video.find('title').text,
|
||||
'url': video_url,
|
||||
'ext': determine_ext(video_url),
|
||||
'thumbnail': video.find('thumb').text,
|
||||
'description': video.find('description').text,
|
||||
'uploader': config.find('blog/name').text,
|
||||
'uploader_id': video.find('identifier').text,
|
||||
'view_count': re.search(r'\d+', video.find('views').text).group(),
|
||||
}
|
@@ -1,5 +1,6 @@
|
||||
import json
|
||||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -19,18 +20,31 @@ class VimeoIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$'
|
||||
_NETRC_MACHINE = 'vimeo'
|
||||
IE_NAME = u'vimeo'
|
||||
_TEST = {
|
||||
u'url': u'http://vimeo.com/56015672',
|
||||
u'file': u'56015672.mp4',
|
||||
u'md5': u'8879b6cc097e987f02484baf890129e5',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20121220",
|
||||
u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
|
||||
u"uploader_id": u"user7108434",
|
||||
u"uploader": u"Filippo Valsorda",
|
||||
u"title": u"youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550"
|
||||
}
|
||||
}
|
||||
_TESTS = [
|
||||
{
|
||||
u'url': u'http://vimeo.com/56015672',
|
||||
u'file': u'56015672.mp4',
|
||||
u'md5': u'8879b6cc097e987f02484baf890129e5',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20121220",
|
||||
u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
|
||||
u"uploader_id": u"user7108434",
|
||||
u"uploader": u"Filippo Valsorda",
|
||||
u"title": u"youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
|
||||
},
|
||||
},
|
||||
{
|
||||
u'url': u'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876',
|
||||
u'file': u'68093876.mp4',
|
||||
u'md5': u'3b5ca6aa22b60dfeeadf50b72e44ed82',
|
||||
u'note': u'Vimeo Pro video (#1197)',
|
||||
u'info_dict': {
|
||||
u'uploader_id': u'openstreetmapus',
|
||||
u'uploader': u'OpenStreetMap US',
|
||||
u'title': u'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
@@ -82,7 +96,9 @@ class VimeoIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
if not mobj.group('proto'):
|
||||
url = 'https://' + url
|
||||
if mobj.group('direct_link') or mobj.group('pro'):
|
||||
elif mobj.group('pro'):
|
||||
url = 'http://player.vimeo.com/video/' + video_id
|
||||
elif mobj.group('direct_link'):
|
||||
url = 'https://vimeo.com/' + video_id
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
@@ -171,3 +187,31 @@ class VimeoIE(InfoExtractor):
|
||||
'thumbnail': video_thumbnail,
|
||||
'description': video_description,
|
||||
}]
|
||||
|
||||
|
||||
class VimeoChannelIE(InfoExtractor):
|
||||
IE_NAME = u'vimeo:channel'
|
||||
_VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P<id>[^/]+)'
|
||||
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
channel_id = mobj.group('id')
|
||||
video_ids = []
|
||||
|
||||
for pagenum in itertools.count(1):
|
||||
webpage = self._download_webpage('http://vimeo.com/channels/%s/videos/page:%d' % (channel_id, pagenum),
|
||||
channel_id, u'Downloading page %s' % pagenum)
|
||||
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
|
||||
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
||||
break
|
||||
|
||||
entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
|
||||
for video_id in video_ids]
|
||||
channel_title = self._html_search_regex(r'<a href="/channels/%s">(.*?)</a>' % channel_id,
|
||||
webpage, u'channel title')
|
||||
return {'_type': 'playlist',
|
||||
'id': channel_id,
|
||||
'title': channel_title,
|
||||
'entries': entries,
|
||||
}
|
||||
|
@@ -12,17 +12,17 @@ from ..utils import (
|
||||
|
||||
|
||||
class WatIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL=r'http://www.wat.tv/.*-(?P<shortID>.*?)_.*?.html'
|
||||
IE_NAME = 'wat.tv'
|
||||
_TEST = {
|
||||
u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
|
||||
u'file': u'10631273.mp4',
|
||||
u'md5': u'0a4fe7870f31eaeabb5e25fd8da8414a',
|
||||
u'md5': u'd8b2231e1e333acd12aad94b80937e19',
|
||||
u'info_dict': {
|
||||
u'title': u'World War Z - Philadelphia VOST',
|
||||
u'description': u'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
|
||||
}
|
||||
},
|
||||
u'skip': u'Sometimes wat serves the whole file with the --test option',
|
||||
}
|
||||
|
||||
def download_video_info(self, real_id):
|
||||
@@ -59,20 +59,8 @@ class WatIE(InfoExtractor):
|
||||
|
||||
# Otherwise we can continue and extract just one part, we have to use
|
||||
# the short id for getting the video url
|
||||
player_data = compat_urllib_parse.urlencode({'shortVideoId': short_id,
|
||||
'html5': '1'})
|
||||
player_info = self._download_webpage('http://www.wat.tv/player?' + player_data,
|
||||
real_id, u'Downloading player info')
|
||||
player = json.loads(player_info)['player']
|
||||
html5_player = self._html_search_regex(r'iframe src="(.*?)"', player,
|
||||
'html5 player')
|
||||
player_webpage = self._download_webpage(html5_player, real_id,
|
||||
u'Downloading player webpage')
|
||||
|
||||
video_url = self._search_regex(r'urlhtml5 : "(.*?)"', player_webpage,
|
||||
'video url')
|
||||
info = {'id': real_id,
|
||||
'url': video_url,
|
||||
'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
|
||||
'ext': 'mp4',
|
||||
'title': first_chapter['title'],
|
||||
'thumbnail': first_chapter['preview'],
|
||||
|
@@ -21,6 +21,13 @@ class WorldStarHipHopIE(InfoExtractor):
|
||||
|
||||
webpage_src = self._download_webpage(url, video_id)
|
||||
|
||||
m_vevo_id = re.search(r'videoId=(.*?)&?',
|
||||
webpage_src)
|
||||
|
||||
if m_vevo_id is not None:
|
||||
self.to_screen(u'Vevo video detected:')
|
||||
return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
|
||||
|
||||
video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)',
|
||||
webpage_src, u'video URL')
|
||||
|
||||
|
@@ -3,7 +3,8 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
|
||||
unescapeHTML,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
@@ -36,15 +37,16 @@ class XHamsterIE(InfoExtractor):
|
||||
video_url = compat_urllib_parse.unquote(mobj.group('file'))
|
||||
else:
|
||||
video_url = mobj.group('server')+'/key='+mobj.group('file')
|
||||
video_extension = video_url.split('.')[-1]
|
||||
|
||||
video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
|
||||
webpage, u'title')
|
||||
|
||||
# Can't see the description anywhere in the UI
|
||||
# video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
|
||||
# webpage, u'description', fatal=False)
|
||||
# if video_description: video_description = unescapeHTML(video_description)
|
||||
# Only a few videos have an description
|
||||
mobj = re.search('<span>Description: </span>(?P<description>[^<]+)', webpage)
|
||||
if mobj:
|
||||
video_description = unescapeHTML(mobj.group('description'))
|
||||
else:
|
||||
video_description = None
|
||||
|
||||
mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
|
||||
if mobj:
|
||||
@@ -62,9 +64,9 @@ class XHamsterIE(InfoExtractor):
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': video_extension,
|
||||
'ext': determine_ext(video_url),
|
||||
'title': video_title,
|
||||
# 'description': video_description,
|
||||
'description': video_description,
|
||||
'upload_date': video_upload_date,
|
||||
'uploader_id': video_uploader_id,
|
||||
'thumbnail': video_thumbnail
|
||||
|
@@ -23,8 +23,114 @@ from ..utils import (
|
||||
orderedSet,
|
||||
)
|
||||
|
||||
class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
"""Provide base functions for Youtube extractors"""
|
||||
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
|
||||
_LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
|
||||
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
|
||||
_NETRC_MACHINE = 'youtube'
|
||||
# If True it will raise an error if no login info is provided
|
||||
_LOGIN_REQUIRED = False
|
||||
|
||||
class YoutubeIE(InfoExtractor):
|
||||
def report_lang(self):
|
||||
"""Report attempt to set language."""
|
||||
self.to_screen(u'Setting language')
|
||||
|
||||
def _set_language(self):
|
||||
request = compat_urllib_request.Request(self._LANG_URL)
|
||||
try:
|
||||
self.report_lang()
|
||||
compat_urllib_request.urlopen(request).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
|
||||
return False
|
||||
return True
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
# No authentication to be performed
|
||||
if username is None:
|
||||
if self._LOGIN_REQUIRED:
|
||||
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||
return False
|
||||
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL)
|
||||
try:
|
||||
login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
|
||||
return False
|
||||
|
||||
galx = None
|
||||
dsh = None
|
||||
match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
|
||||
if match:
|
||||
galx = match.group(1)
|
||||
match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
|
||||
if match:
|
||||
dsh = match.group(1)
|
||||
|
||||
# Log in
|
||||
login_form_strs = {
|
||||
u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
|
||||
u'Email': username,
|
||||
u'GALX': galx,
|
||||
u'Passwd': password,
|
||||
u'PersistentCookie': u'yes',
|
||||
u'_utf8': u'霱',
|
||||
u'bgresponse': u'js_disabled',
|
||||
u'checkConnection': u'',
|
||||
u'checkedDomains': u'youtube',
|
||||
u'dnConn': u'',
|
||||
u'dsh': dsh,
|
||||
u'pstMsg': u'0',
|
||||
u'rmShown': u'1',
|
||||
u'secTok': u'',
|
||||
u'signIn': u'Sign in',
|
||||
u'timeStmp': u'',
|
||||
u'service': u'youtube',
|
||||
u'uilel': u'3',
|
||||
u'hl': u'en_US',
|
||||
}
|
||||
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
|
||||
# chokes on unicode
|
||||
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
|
||||
login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
|
||||
try:
|
||||
self.report_login()
|
||||
login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
|
||||
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
|
||||
self._downloader.report_warning(u'unable to log in: bad username or password')
|
||||
return False
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
|
||||
return False
|
||||
return True
|
||||
|
||||
def _confirm_age(self):
|
||||
age_form = {
|
||||
'next_url': '/',
|
||||
'action_confirm': 'Confirm',
|
||||
}
|
||||
request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
|
||||
try:
|
||||
self.report_age_confirmation()
|
||||
compat_urllib_request.urlopen(request).read().decode('utf-8')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
|
||||
return True
|
||||
|
||||
def _real_initialize(self):
|
||||
if self._downloader is None:
|
||||
return
|
||||
if not self._set_language():
|
||||
return
|
||||
if not self._login():
|
||||
return
|
||||
self._confirm_age()
|
||||
|
||||
class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
IE_DESC = u'YouTube.com'
|
||||
_VALID_URL = r"""^
|
||||
(
|
||||
@@ -35,7 +141,7 @@ class YoutubeIE(InfoExtractor):
|
||||
(?: # the various things that can precede the ID:
|
||||
(?:(?:v|embed|e)/) # v/ or embed/ or e/
|
||||
|(?: # or the v= param in all its forms
|
||||
(?:watch|movie(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
|
||||
(?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
|
||||
(?:\?|\#!?) # the params delimiter ? or # or #!
|
||||
(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
|
||||
v=
|
||||
@@ -45,14 +151,27 @@ class YoutubeIE(InfoExtractor):
|
||||
([0-9A-Za-z_-]+) # here is it! the YouTube video ID
|
||||
(?(1).+)? # if we found the ID, everything can follow
|
||||
$"""
|
||||
_LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
|
||||
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
|
||||
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
|
||||
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
|
||||
_NETRC_MACHINE = 'youtube'
|
||||
# Listed in order of quality
|
||||
_available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
|
||||
_available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13']
|
||||
_available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13',
|
||||
'95', '94', '93', '92', '132', '151',
|
||||
# 3D
|
||||
'85', '84', '102', '83', '101', '82', '100',
|
||||
# Dash video
|
||||
'138', '137', '248', '136', '247', '135', '246',
|
||||
'245', '244', '134', '243', '133', '242', '160',
|
||||
# Dash audio
|
||||
'141', '172', '140', '171', '139',
|
||||
]
|
||||
_available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13',
|
||||
'95', '94', '93', '92', '132', '151',
|
||||
'85', '102', '84', '101', '83', '100', '82',
|
||||
# Dash video
|
||||
'138', '248', '137', '247', '136', '246', '245',
|
||||
'244', '135', '243', '134', '242', '133', '160',
|
||||
# Dash audio
|
||||
'172', '141', '171', '140', '139',
|
||||
]
|
||||
_video_extensions = {
|
||||
'13': '3gp',
|
||||
'17': 'mp4',
|
||||
@@ -64,6 +183,47 @@ class YoutubeIE(InfoExtractor):
|
||||
'44': 'webm',
|
||||
'45': 'webm',
|
||||
'46': 'webm',
|
||||
|
||||
# 3d videos
|
||||
'82': 'mp4',
|
||||
'83': 'mp4',
|
||||
'84': 'mp4',
|
||||
'85': 'mp4',
|
||||
'100': 'webm',
|
||||
'101': 'webm',
|
||||
'102': 'webm',
|
||||
|
||||
# videos that use m3u8
|
||||
'92': 'mp4',
|
||||
'93': 'mp4',
|
||||
'94': 'mp4',
|
||||
'95': 'mp4',
|
||||
'96': 'mp4',
|
||||
'132': 'mp4',
|
||||
'151': 'mp4',
|
||||
|
||||
# Dash mp4
|
||||
'133': 'mp4',
|
||||
'134': 'mp4',
|
||||
'135': 'mp4',
|
||||
'136': 'mp4',
|
||||
'137': 'mp4',
|
||||
'138': 'mp4',
|
||||
'139': 'mp4',
|
||||
'140': 'mp4',
|
||||
'141': 'mp4',
|
||||
'160': 'mp4',
|
||||
|
||||
# Dash webm
|
||||
'171': 'webm',
|
||||
'172': 'webm',
|
||||
'242': 'webm',
|
||||
'243': 'webm',
|
||||
'244': 'webm',
|
||||
'245': 'webm',
|
||||
'246': 'webm',
|
||||
'247': 'webm',
|
||||
'248': 'webm',
|
||||
}
|
||||
_video_dimensions = {
|
||||
'5': '240x400',
|
||||
@@ -80,7 +240,69 @@ class YoutubeIE(InfoExtractor):
|
||||
'44': '480x854',
|
||||
'45': '720x1280',
|
||||
'46': '1080x1920',
|
||||
'82': '360p',
|
||||
'83': '480p',
|
||||
'84': '720p',
|
||||
'85': '1080p',
|
||||
'92': '240p',
|
||||
'93': '360p',
|
||||
'94': '480p',
|
||||
'95': '720p',
|
||||
'96': '1080p',
|
||||
'100': '360p',
|
||||
'101': '480p',
|
||||
'102': '720p',
|
||||
'132': '240p',
|
||||
'151': '72p',
|
||||
'133': '240p',
|
||||
'134': '360p',
|
||||
'135': '480p',
|
||||
'136': '720p',
|
||||
'137': '1080p',
|
||||
'138': '>1080p',
|
||||
'139': '48k',
|
||||
'140': '128k',
|
||||
'141': '256k',
|
||||
'160': '192p',
|
||||
'171': '128k',
|
||||
'172': '256k',
|
||||
'242': '240p',
|
||||
'243': '360p',
|
||||
'244': '480p',
|
||||
'245': '480p',
|
||||
'246': '480p',
|
||||
'247': '720p',
|
||||
'248': '1080p',
|
||||
}
|
||||
_special_itags = {
|
||||
'82': '3D',
|
||||
'83': '3D',
|
||||
'84': '3D',
|
||||
'85': '3D',
|
||||
'100': '3D',
|
||||
'101': '3D',
|
||||
'102': '3D',
|
||||
'133': 'DASH Video',
|
||||
'134': 'DASH Video',
|
||||
'135': 'DASH Video',
|
||||
'136': 'DASH Video',
|
||||
'137': 'DASH Video',
|
||||
'138': 'DASH Video',
|
||||
'139': 'DASH Audio',
|
||||
'140': 'DASH Audio',
|
||||
'141': 'DASH Audio',
|
||||
'160': 'DASH Video',
|
||||
'171': 'DASH Audio',
|
||||
'172': 'DASH Audio',
|
||||
'242': 'DASH Video',
|
||||
'243': 'DASH Video',
|
||||
'244': 'DASH Video',
|
||||
'245': 'DASH Video',
|
||||
'246': 'DASH Video',
|
||||
'247': 'DASH Video',
|
||||
'248': 'DASH Video',
|
||||
}
|
||||
|
||||
IE_NAME = u'youtube'
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -114,7 +336,7 @@ class YoutubeIE(InfoExtractor):
|
||||
u"upload_date": u"20120506",
|
||||
u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
|
||||
u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c",
|
||||
u"uploader": u"IconaPop",
|
||||
u"uploader": u"Icona Pop",
|
||||
u"uploader_id": u"IconaPop"
|
||||
}
|
||||
},
|
||||
@@ -130,6 +352,21 @@ class YoutubeIE(InfoExtractor):
|
||||
u"uploader_id": u"justintimberlakeVEVO"
|
||||
}
|
||||
},
|
||||
{
|
||||
u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
|
||||
u'file': u'TGi3HqYrWHE.mp4',
|
||||
u'note': u'm3u8 video',
|
||||
u'info_dict': {
|
||||
u'title': u'Triathlon - Men - London 2012 Olympic Games',
|
||||
u'description': u'- Men - TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
|
||||
u'uploader': u'olympic',
|
||||
u'upload_date': u'20120807',
|
||||
u'uploader_id': u'olympic',
|
||||
},
|
||||
u'params': {
|
||||
u'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@@ -139,10 +376,6 @@ class YoutubeIE(InfoExtractor):
|
||||
if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
|
||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
||||
|
||||
def report_lang(self):
|
||||
"""Report attempt to set language."""
|
||||
self.to_screen(u'Setting language')
|
||||
|
||||
def report_video_webpage_download(self, video_id):
|
||||
"""Report attempt to download video webpage."""
|
||||
self.to_screen(u'%s: Downloading video webpage' % video_id)
|
||||
@@ -183,37 +416,55 @@ class YoutubeIE(InfoExtractor):
|
||||
return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
|
||||
elif len(s) == 90:
|
||||
return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
|
||||
elif len(s) == 89:
|
||||
return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
|
||||
elif len(s) == 88:
|
||||
return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
|
||||
elif len(s) == 87:
|
||||
return s[4:23] + s[86] + s[24:85]
|
||||
return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
|
||||
elif len(s) == 86:
|
||||
return s[2:63] + s[82] + s[64:82] + s[63]
|
||||
return s[5:40] + s[3] + s[41:48] + s[0] + s[49:86]
|
||||
elif len(s) == 85:
|
||||
return s[2:8] + s[0] + s[9:21] + s[65] + s[22:65] + s[84] + s[66:82] + s[21]
|
||||
return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
|
||||
elif len(s) == 84:
|
||||
return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26]
|
||||
return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]
|
||||
elif len(s) == 83:
|
||||
return s[6] + s[3:6] + s[33] + s[7:24] + s[0] + s[25:33] + s[53] + s[34:53] + s[24] + s[54:]
|
||||
return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
|
||||
elif len(s) == 82:
|
||||
return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34]
|
||||
return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82]
|
||||
elif len(s) == 81:
|
||||
return s[6] + s[3:6] + s[33] + s[7:24] + s[0] + s[25:33] + s[2] + s[34:53] + s[24] + s[54:81]
|
||||
return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
|
||||
elif len(s) == 80:
|
||||
return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
|
||||
elif len(s) == 79:
|
||||
return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
|
||||
|
||||
else:
|
||||
raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
|
||||
|
||||
def _decrypt_signature_age_gate(self, s):
|
||||
# The videos with age protection use another player, so the algorithms
|
||||
# can be different.
|
||||
if len(s) == 86:
|
||||
return s[2:63] + s[82] + s[64:82] + s[63]
|
||||
else:
|
||||
# Fallback to the other algortihms
|
||||
return self._decrypt_signature(s)
|
||||
|
||||
|
||||
def _get_available_subtitles(self, video_id):
|
||||
self.report_video_subtitles_download(video_id)
|
||||
request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
|
||||
try:
|
||||
sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
return (u'unable to download video subtitles: %s' % compat_str(err), None)
|
||||
self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
|
||||
return {}
|
||||
sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
|
||||
sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
|
||||
if not sub_lang_list:
|
||||
return (u'video doesn\'t have subtitles', None)
|
||||
self._downloader.report_warning(u'video doesn\'t have subtitles')
|
||||
return {}
|
||||
return sub_lang_list
|
||||
|
||||
def _list_available_subtitles(self, video_id):
|
||||
@@ -222,8 +473,7 @@ class YoutubeIE(InfoExtractor):
|
||||
|
||||
def _request_subtitle(self, sub_lang, sub_name, video_id, format):
|
||||
"""
|
||||
Return tuple:
|
||||
(error_message, sub_lang, sub)
|
||||
Return the subtitle as a string or None if they are not found
|
||||
"""
|
||||
self.report_video_subtitles_request(video_id, sub_lang, format)
|
||||
params = compat_urllib_parse.urlencode({
|
||||
@@ -236,21 +486,24 @@ class YoutubeIE(InfoExtractor):
|
||||
try:
|
||||
sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
return (u'unable to download video subtitles: %s' % compat_str(err), None, None)
|
||||
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
|
||||
return
|
||||
if not sub:
|
||||
return (u'Did not fetch video subtitles', None, None)
|
||||
return (None, sub_lang, sub)
|
||||
self._downloader.report_warning(u'Did not fetch video subtitles')
|
||||
return
|
||||
return sub
|
||||
|
||||
def _request_automatic_caption(self, video_id, webpage):
|
||||
"""We need the webpage for getting the captions url, pass it as an
|
||||
argument to speed up the process."""
|
||||
sub_lang = self._downloader.params.get('subtitleslang') or 'en'
|
||||
sub_lang = (self._downloader.params.get('subtitleslangs') or ['en'])[0]
|
||||
sub_format = self._downloader.params.get('subtitlesformat')
|
||||
self.to_screen(u'%s: Looking for automatic captions' % video_id)
|
||||
mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
|
||||
err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
|
||||
if mobj is None:
|
||||
return [(err_msg, None, None)]
|
||||
self._downloader.report_warning(err_msg)
|
||||
return {}
|
||||
player_config = json.loads(mobj.group(1))
|
||||
try:
|
||||
args = player_config[u'args']
|
||||
@@ -265,131 +518,51 @@ class YoutubeIE(InfoExtractor):
|
||||
})
|
||||
subtitles_url = caption_url + '&' + params
|
||||
sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
|
||||
return [(None, sub_lang, sub)]
|
||||
except KeyError:
|
||||
return [(err_msg, None, None)]
|
||||
return {sub_lang: sub}
|
||||
# An extractor error can be raise by the download process if there are
|
||||
# no automatic captions but there are subtitles
|
||||
except (KeyError, ExtractorError):
|
||||
self._downloader.report_warning(err_msg)
|
||||
return {}
|
||||
|
||||
def _extract_subtitle(self, video_id):
|
||||
def _extract_subtitles(self, video_id):
|
||||
"""
|
||||
Return a list with a tuple:
|
||||
[(error_message, sub_lang, sub)]
|
||||
Return a dictionary: {language: subtitles} or {} if the subtitles
|
||||
couldn't be found
|
||||
"""
|
||||
sub_lang_list = self._get_available_subtitles(video_id)
|
||||
available_subs_list = self._get_available_subtitles(video_id)
|
||||
sub_format = self._downloader.params.get('subtitlesformat')
|
||||
if isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles
|
||||
return [(sub_lang_list[0], None, None)]
|
||||
if self._downloader.params.get('subtitleslang', False):
|
||||
sub_lang = self._downloader.params.get('subtitleslang')
|
||||
elif 'en' in sub_lang_list:
|
||||
sub_lang = 'en'
|
||||
if not available_subs_list: #There was some error, it didn't get the available subtitles
|
||||
return {}
|
||||
if self._downloader.params.get('allsubtitles', False):
|
||||
sub_lang_list = available_subs_list
|
||||
else:
|
||||
sub_lang = list(sub_lang_list.keys())[0]
|
||||
if not sub_lang in sub_lang_list:
|
||||
return [(u'no closed captions found in the specified language "%s"' % sub_lang, None, None)]
|
||||
if self._downloader.params.get('subtitleslangs', False):
|
||||
reqested_langs = self._downloader.params.get('subtitleslangs')
|
||||
elif 'en' in available_subs_list:
|
||||
reqested_langs = ['en']
|
||||
else:
|
||||
reqested_langs = [list(available_subs_list.keys())[0]]
|
||||
|
||||
subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
|
||||
return [subtitle]
|
||||
|
||||
def _extract_all_subtitles(self, video_id):
|
||||
sub_lang_list = self._get_available_subtitles(video_id)
|
||||
sub_format = self._downloader.params.get('subtitlesformat')
|
||||
if isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles
|
||||
return [(sub_lang_list[0], None, None)]
|
||||
subtitles = []
|
||||
sub_lang_list = {}
|
||||
for sub_lang in reqested_langs:
|
||||
if not sub_lang in available_subs_list:
|
||||
self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
|
||||
continue
|
||||
sub_lang_list[sub_lang] = available_subs_list[sub_lang]
|
||||
subtitles = {}
|
||||
for sub_lang in sub_lang_list:
|
||||
subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
|
||||
subtitles.append(subtitle)
|
||||
if subtitle:
|
||||
subtitles[sub_lang] = subtitle
|
||||
return subtitles
|
||||
|
||||
def _print_formats(self, formats):
|
||||
print('Available formats:')
|
||||
for x in formats:
|
||||
print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???')))
|
||||
|
||||
def _real_initialize(self):
|
||||
if self._downloader is None:
|
||||
return
|
||||
|
||||
# Set language
|
||||
request = compat_urllib_request.Request(self._LANG_URL)
|
||||
try:
|
||||
self.report_lang()
|
||||
compat_urllib_request.urlopen(request).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
|
||||
return
|
||||
|
||||
(username, password) = self._get_login_info()
|
||||
|
||||
# No authentication to be performed
|
||||
if username is None:
|
||||
return
|
||||
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL)
|
||||
try:
|
||||
login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
|
||||
return
|
||||
|
||||
galx = None
|
||||
dsh = None
|
||||
match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
|
||||
if match:
|
||||
galx = match.group(1)
|
||||
|
||||
match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
|
||||
if match:
|
||||
dsh = match.group(1)
|
||||
|
||||
# Log in
|
||||
login_form_strs = {
|
||||
u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
|
||||
u'Email': username,
|
||||
u'GALX': galx,
|
||||
u'Passwd': password,
|
||||
u'PersistentCookie': u'yes',
|
||||
u'_utf8': u'霱',
|
||||
u'bgresponse': u'js_disabled',
|
||||
u'checkConnection': u'',
|
||||
u'checkedDomains': u'youtube',
|
||||
u'dnConn': u'',
|
||||
u'dsh': dsh,
|
||||
u'pstMsg': u'0',
|
||||
u'rmShown': u'1',
|
||||
u'secTok': u'',
|
||||
u'signIn': u'Sign in',
|
||||
u'timeStmp': u'',
|
||||
u'service': u'youtube',
|
||||
u'uilel': u'3',
|
||||
u'hl': u'en_US',
|
||||
}
|
||||
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
|
||||
# chokes on unicode
|
||||
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
|
||||
login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
|
||||
try:
|
||||
self.report_login()
|
||||
login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
|
||||
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
|
||||
self._downloader.report_warning(u'unable to log in: bad username or password')
|
||||
return
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
|
||||
return
|
||||
|
||||
# Confirm age
|
||||
age_form = {
|
||||
'next_url': '/',
|
||||
'action_confirm': 'Confirm',
|
||||
}
|
||||
request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
|
||||
try:
|
||||
self.report_age_confirmation()
|
||||
compat_urllib_request.urlopen(request).read().decode('utf-8')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
|
||||
print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
|
||||
self._video_dimensions.get(x, '???'),
|
||||
' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
|
||||
|
||||
def _extract_id(self, url):
|
||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
@@ -398,6 +571,57 @@ class YoutubeIE(InfoExtractor):
|
||||
video_id = mobj.group(2)
|
||||
return video_id
|
||||
|
||||
def _get_video_url_list(self, url_map):
|
||||
"""
|
||||
Transform a dictionary in the format {itag:url} to a list of (itag, url)
|
||||
with the requested formats.
|
||||
"""
|
||||
req_format = self._downloader.params.get('format', None)
|
||||
format_limit = self._downloader.params.get('format_limit', None)
|
||||
available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
|
||||
if format_limit is not None and format_limit in available_formats:
|
||||
format_list = available_formats[available_formats.index(format_limit):]
|
||||
else:
|
||||
format_list = available_formats
|
||||
existing_formats = [x for x in format_list if x in url_map]
|
||||
if len(existing_formats) == 0:
|
||||
raise ExtractorError(u'no known formats available for video')
|
||||
if self._downloader.params.get('listformats', None):
|
||||
self._print_formats(existing_formats)
|
||||
return
|
||||
if req_format is None or req_format == 'best':
|
||||
video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
|
||||
elif req_format == 'worst':
|
||||
video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
|
||||
elif req_format in ('-1', 'all'):
|
||||
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
|
||||
else:
|
||||
# Specific formats. We pick the first in a slash-delimeted sequence.
|
||||
# For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
|
||||
req_formats = req_format.split('/')
|
||||
video_url_list = None
|
||||
for rf in req_formats:
|
||||
if rf in url_map:
|
||||
video_url_list = [(rf, url_map[rf])]
|
||||
break
|
||||
if video_url_list is None:
|
||||
raise ExtractorError(u'requested format not available')
|
||||
return video_url_list
|
||||
|
||||
def _extract_from_m3u8(self, manifest_url, video_id):
|
||||
url_map = {}
|
||||
def _get_urls(_manifest):
|
||||
lines = _manifest.split('\n')
|
||||
urls = filter(lambda l: l and not l.startswith('#'),
|
||||
lines)
|
||||
return urls
|
||||
manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
|
||||
formats_urls = _get_urls(manifest)
|
||||
for format_url in formats_urls:
|
||||
itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
|
||||
url_map[itag] = format_url
|
||||
return url_map
|
||||
|
||||
def _real_extract(self, url):
|
||||
if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
|
||||
self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).')
|
||||
@@ -521,25 +745,10 @@ class YoutubeIE(InfoExtractor):
|
||||
# subtitles
|
||||
video_subtitles = None
|
||||
|
||||
if self._downloader.params.get('writesubtitles', False):
|
||||
video_subtitles = self._extract_subtitle(video_id)
|
||||
if video_subtitles:
|
||||
(sub_error, sub_lang, sub) = video_subtitles[0]
|
||||
if sub_error:
|
||||
self._downloader.report_warning(sub_error)
|
||||
|
||||
if self._downloader.params.get('writeautomaticsub', False):
|
||||
if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
|
||||
video_subtitles = self._extract_subtitles(video_id)
|
||||
elif self._downloader.params.get('writeautomaticsub', False):
|
||||
video_subtitles = self._request_automatic_caption(video_id, video_webpage)
|
||||
(sub_error, sub_lang, sub) = video_subtitles[0]
|
||||
if sub_error:
|
||||
self._downloader.report_warning(sub_error)
|
||||
|
||||
if self._downloader.params.get('allsubtitles', False):
|
||||
video_subtitles = self._extract_all_subtitles(video_id)
|
||||
for video_subtitle in video_subtitles:
|
||||
(sub_error, sub_lang, sub) = video_subtitle
|
||||
if sub_error:
|
||||
self._downloader.report_warning(sub_error)
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id)
|
||||
@@ -552,7 +761,6 @@ class YoutubeIE(InfoExtractor):
|
||||
video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
|
||||
|
||||
# Decide which formats to download
|
||||
req_format = self._downloader.params.get('format', None)
|
||||
|
||||
try:
|
||||
mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
|
||||
@@ -566,6 +774,17 @@ class YoutubeIE(InfoExtractor):
|
||||
if m_s is not None:
|
||||
self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
|
||||
video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
|
||||
m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
|
||||
if m_s is not None:
|
||||
if 'url_encoded_fmt_stream_map' in video_info:
|
||||
video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
|
||||
else:
|
||||
video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
|
||||
elif 'adaptive_fmts' in video_info:
|
||||
if 'url_encoded_fmt_stream_map' in video_info:
|
||||
video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
|
||||
else:
|
||||
video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
@@ -587,8 +806,8 @@ class YoutubeIE(InfoExtractor):
|
||||
s = url_data['s'][0]
|
||||
if age_gate:
|
||||
player_version = self._search_regex(r'ad3-(.+?)\.swf',
|
||||
video_info['ad3_module'][0], 'flash player',
|
||||
fatal=False)
|
||||
video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND',
|
||||
'flash player', fatal=False)
|
||||
player = 'flash player %s' % player_version
|
||||
else:
|
||||
player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
|
||||
@@ -596,41 +815,25 @@ class YoutubeIE(InfoExtractor):
|
||||
parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.'))
|
||||
self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
|
||||
(len(s), parts_sizes, url_data['itag'][0], player))
|
||||
signature = self._decrypt_signature(url_data['s'][0])
|
||||
encrypted_sig = url_data['s'][0]
|
||||
if age_gate:
|
||||
signature = self._decrypt_signature_age_gate(encrypted_sig)
|
||||
else:
|
||||
signature = self._decrypt_signature(encrypted_sig)
|
||||
url += '&signature=' + signature
|
||||
if 'ratebypass' not in url:
|
||||
url += '&ratebypass=yes'
|
||||
url_map[url_data['itag'][0]] = url
|
||||
|
||||
format_limit = self._downloader.params.get('format_limit', None)
|
||||
available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
|
||||
if format_limit is not None and format_limit in available_formats:
|
||||
format_list = available_formats[available_formats.index(format_limit):]
|
||||
else:
|
||||
format_list = available_formats
|
||||
existing_formats = [x for x in format_list if x in url_map]
|
||||
if len(existing_formats) == 0:
|
||||
raise ExtractorError(u'no known formats available for video')
|
||||
if self._downloader.params.get('listformats', None):
|
||||
self._print_formats(existing_formats)
|
||||
video_url_list = self._get_video_url_list(url_map)
|
||||
if not video_url_list:
|
||||
return
|
||||
if req_format is None or req_format == 'best':
|
||||
video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
|
||||
elif req_format == 'worst':
|
||||
video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
|
||||
elif req_format in ('-1', 'all'):
|
||||
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
|
||||
else:
|
||||
# Specific formats. We pick the first in a slash-delimeted sequence.
|
||||
# For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
|
||||
req_formats = req_format.split('/')
|
||||
video_url_list = None
|
||||
for rf in req_formats:
|
||||
if rf in url_map:
|
||||
video_url_list = [(rf, url_map[rf])]
|
||||
break
|
||||
if video_url_list is None:
|
||||
raise ExtractorError(u'requested format not available')
|
||||
elif video_info.get('hlsvp'):
|
||||
manifest_url = video_info['hlsvp'][0]
|
||||
url_map = self._extract_from_m3u8(manifest_url, video_id)
|
||||
video_url_list = self._get_video_url_list(url_map)
|
||||
if not video_url_list:
|
||||
return
|
||||
|
||||
else:
|
||||
raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
|
||||
|
||||
@@ -639,8 +842,9 @@ class YoutubeIE(InfoExtractor):
|
||||
# Extension
|
||||
video_extension = self._video_extensions.get(format_param, 'flv')
|
||||
|
||||
video_format = '{0} - {1}'.format(format_param if format_param else video_extension,
|
||||
self._video_dimensions.get(format_param, '???'))
|
||||
video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
|
||||
self._video_dimensions.get(format_param, '???'),
|
||||
' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
|
||||
|
||||
results.append({
|
||||
'id': video_id,
|
||||
@@ -670,10 +874,10 @@ class YoutubePlaylistIE(InfoExtractor):
|
||||
\? (?:.*?&)*? (?:p|a|list)=
|
||||
| p/
|
||||
)
|
||||
((?:PL|EC|UU)?[0-9A-Za-z-_]{10,})
|
||||
((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
|
||||
.*
|
||||
|
|
||||
((?:PL|EC|UU)[0-9A-Za-z-_]{10,})
|
||||
((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
|
||||
)"""
|
||||
_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
|
||||
_MAX_RESULTS = 50
|
||||
@@ -692,11 +896,14 @@ class YoutubePlaylistIE(InfoExtractor):
|
||||
|
||||
# Download playlist videos from API
|
||||
playlist_id = mobj.group(1) or mobj.group(2)
|
||||
page_num = 1
|
||||
videos = []
|
||||
|
||||
while True:
|
||||
url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, self._MAX_RESULTS * (page_num - 1) + 1)
|
||||
for page_num in itertools.count(1):
|
||||
start_index = self._MAX_RESULTS * (page_num - 1) + 1
|
||||
if start_index >= 1000:
|
||||
self._downloader.report_warning(u'Max number of results reached')
|
||||
break
|
||||
url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
|
||||
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
|
||||
|
||||
try:
|
||||
@@ -716,10 +923,6 @@ class YoutubePlaylistIE(InfoExtractor):
|
||||
if 'media$group' in entry and 'media$player' in entry['media$group']:
|
||||
videos.append((index, entry['media$group']['media$player']['url']))
|
||||
|
||||
if len(response['feed']['entry']) < self._MAX_RESULTS:
|
||||
break
|
||||
page_num += 1
|
||||
|
||||
videos = [v[1] for v in sorted(videos)]
|
||||
|
||||
url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
|
||||
@@ -762,9 +965,7 @@ class YoutubeChannelIE(InfoExtractor):
|
||||
|
||||
# Download any subsequent channel pages using the json-based channel_ajax query
|
||||
if self._MORE_PAGES_INDICATOR in page:
|
||||
while True:
|
||||
pagenum = pagenum + 1
|
||||
|
||||
for pagenum in itertools.count(1):
|
||||
url = self._MORE_PAGES_URL % (pagenum, channel_id)
|
||||
page = self._download_webpage(url, channel_id,
|
||||
u'Downloading page #%s' % pagenum)
|
||||
@@ -807,9 +1008,8 @@ class YoutubeUserIE(InfoExtractor):
|
||||
# all of them.
|
||||
|
||||
video_ids = []
|
||||
pagenum = 0
|
||||
|
||||
while True:
|
||||
for pagenum in itertools.count(0):
|
||||
start_index = pagenum * self._GDATA_PAGE_SIZE + 1
|
||||
|
||||
gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
|
||||
@@ -834,8 +1034,6 @@ class YoutubeUserIE(InfoExtractor):
|
||||
if len(ids_in_page) < self._GDATA_PAGE_SIZE:
|
||||
break
|
||||
|
||||
pagenum += 1
|
||||
|
||||
urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
|
||||
url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
|
||||
return [self.playlist_result(url_results, playlist_title = username)]
|
||||
@@ -898,33 +1096,30 @@ class YoutubeShowIE(InfoExtractor):
|
||||
return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
|
||||
|
||||
|
||||
class YoutubeFeedsInfoExtractor(YoutubeIE):
|
||||
class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
"""
|
||||
Base class for extractors that fetch info from
|
||||
http://www.youtube.com/feed_ajax
|
||||
Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
|
||||
"""
|
||||
_LOGIN_REQUIRED = True
|
||||
_PAGING_STEP = 30
|
||||
|
||||
# Overwrite YoutubeIE properties we don't want
|
||||
_TESTS = []
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return re.match(cls._VALID_URL, url) is not None
|
||||
# use action_load_personal_feed instead of action_load_system_feed
|
||||
_PERSONAL_FEED = False
|
||||
|
||||
@property
|
||||
def _FEED_TEMPLATE(self):
|
||||
return 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=%s&paging=%%s' % self._FEED_NAME
|
||||
action = 'action_load_system_feed'
|
||||
if self._PERSONAL_FEED:
|
||||
action = 'action_load_personal_feed'
|
||||
return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
|
||||
|
||||
@property
|
||||
def IE_NAME(self):
|
||||
return u'youtube:%s' % self._FEED_NAME
|
||||
|
||||
def _real_initialize(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
raise ExtractorError(u'No login info available, needed for downloading the Youtube subscriptions.', expected=True)
|
||||
super(YoutubeFeedsInfoExtractor, self)._real_initialize()
|
||||
self._login()
|
||||
|
||||
def _real_extract(self, url):
|
||||
feed_entries = []
|
||||
@@ -936,7 +1131,7 @@ class YoutubeFeedsInfoExtractor(YoutubeIE):
|
||||
u'Downloading page %s' % i)
|
||||
info = json.loads(info)
|
||||
feed_html = info['feed_html']
|
||||
m_ids = re.finditer(r'"/watch\?v=(.*?)"', feed_html)
|
||||
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
|
||||
ids = orderedSet(m.group(1) for m in m_ids)
|
||||
feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
|
||||
if info['paging'] is None:
|
||||
@@ -954,3 +1149,22 @@ class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
|
||||
_VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
|
||||
_FEED_NAME = 'recommended'
|
||||
_PLAYLIST_TITLE = u'Youtube Recommended videos'
|
||||
|
||||
class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
|
||||
IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
|
||||
_VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
|
||||
_FEED_NAME = 'watch_later'
|
||||
_PLAYLIST_TITLE = u'Youtube Watch Later'
|
||||
_PAGING_STEP = 100
|
||||
_PERSONAL_FEED = True
|
||||
|
||||
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
|
||||
IE_NAME = u'youtube:favorites'
|
||||
IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
|
||||
_VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?'
|
||||
_LOGIN_REQUIRED = True
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
|
||||
playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
|
||||
return self.url_result(playlist_id, 'YoutubePlaylist')
|
||||
|
@@ -207,7 +207,7 @@ if sys.version_info >= (2,7):
|
||||
def find_xpath_attr(node, xpath, key, val):
|
||||
""" Find the xpath xpath[@key=val] """
|
||||
assert re.match(r'^[a-zA-Z]+$', key)
|
||||
assert re.match(r'^[a-zA-Z@]*$', val)
|
||||
assert re.match(r'^[a-zA-Z@\s]*$', val)
|
||||
expr = xpath + u"[@%s='%s']" % (key, val)
|
||||
return node.find(expr)
|
||||
else:
|
||||
@@ -497,7 +497,7 @@ class ExtractorError(Exception):
|
||||
if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
|
||||
expected = True
|
||||
if not expected:
|
||||
msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output.'
|
||||
msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
|
||||
super(ExtractorError, self).__init__(msg)
|
||||
|
||||
self.traceback = tb
|
||||
@@ -657,6 +657,9 @@ def determine_ext(url, default_ext=u'unknown_video'):
|
||||
else:
|
||||
return default_ext
|
||||
|
||||
def subtitles_filename(filename, sub_lang, sub_format):
|
||||
return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
|
||||
|
||||
def date_from_str(date_str):
|
||||
"""
|
||||
Return a datetime object from a string in the format YYYYMMDD or
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2013.07.24.1'
|
||||
__version__ = '2013.08.23'
|
||||
|
Reference in New Issue
Block a user