Compare commits

..

155 Commits

Author SHA1 Message Date
Philipp Hagemeister
5f3544baa3 release 2014.12.10.2 2014-12-10 14:39:06 +01:00
Philipp Hagemeister
da27660014 [youtube] Pass in all variables to DASH manifest (Fixes #4424) 2014-12-10 14:39:00 +01:00
Philipp Hagemeister
b8a6114309 release 2014.12.10.1 2014-12-10 13:21:49 +01:00
Philipp Hagemeister
774e208f94 [youtube] Handle missing DASH manifest (Fixes #4421, fixes #4420) 2014-12-10 13:21:24 +01:00
Philipp Hagemeister
f20b52778b release 2014.12.10 2014-12-10 12:21:40 +01:00
Jaime Marquínez Ferrándiz
83e865a370 Fix PEP8 issue E713 2014-12-09 23:11:26 +01:00
Sergey M․
b89a938687 [bet] Add extractor (Closes #4416) 2014-12-09 22:29:01 +06:00
Sergey M․
e89a2aabed [extractor/common] Add generic SMIL formats extraction routine 2014-12-09 22:28:28 +06:00
Philipp Hagemeister
f58766ce5c [extractor/common] Document ie_key in url results 2014-12-09 10:58:06 +01:00
Philipp Hagemeister
15644a40df Merge pull request #4395 from cryptonaut/issue2883
Handle --get-url with merged formats (fixes #2883)
2014-12-08 17:21:56 +01:00
Philipp Hagemeister
d4800f3c3f Merge branch 'master' of github.com:rg3/youtube-dl 2014-12-08 17:17:31 +01:00
Philipp Hagemeister
09a5dd2d3b [bliptv] Add support for audio-only files (Fixes #4404) 2014-12-08 17:17:22 +01:00
Sergey M․
819039ee63 [tvigle] Update test and modernize 2014-12-08 22:03:02 +06:00
Jaime Marquínez Ferrándiz
603c92080f [nhl] Make sure we add '_sd' before the extension (fixes #4397)
'.replace' would find the first dot in the path.
2014-12-07 11:26:07 +01:00
cryptonaut
16ae61f655 Handle --get-url with merged formats (fixes #2883)
Outputs one URL per line
2014-12-06 12:55:07 -08:00
Sergey M․
0ef4d4ab7e Credit @akretz for prosiebensat1 playlist support (#4394) 2014-12-07 01:48:45 +06:00
Sergey M․
4542535f94 Merge branch 'akretz-master' 2014-12-07 01:47:09 +06:00
Sergey M․
6a52eed80e [prosiebensat1] Improve and simplify 2014-12-07 01:46:44 +06:00
Sergey M․
acf5cbfe93 [extractor/common] Add description to playlist_result 2014-12-07 01:46:30 +06:00
Adrian Kretz
8d1c8cae9c [prosiebensat1] Fix broken tests 2014-12-06 19:21:05 +01:00
Adrian Kretz
c84890f708 [prosiebensat1] Add support for playlists (fixes #4357) 2014-12-06 19:05:22 +01:00
Sergey M․
6d0886204a [radio.de] Add support for radio.de websites (Closes #4393) 2014-12-06 23:01:52 +06:00
Sergey M․
04d02a9d57 [twitch] Add login support (#3986) 2014-12-06 21:24:20 +06:00
Philipp Hagemeister
b82f815f37 Allow iterators for playlist result entries 2014-12-06 14:02:19 +01:00
Philipp Hagemeister
158f8cadc0 [adultswim] PEP8 2014-12-06 14:01:59 +01:00
Philipp Hagemeister
7d70cf4157 [nba] Remove unused import 2014-12-06 13:59:37 +01:00
Philipp Hagemeister
6591fdf51f [tagesschau] Look at the right place for download links 2014-12-06 13:59:10 +01:00
Philipp Hagemeister
47d7c64274 [test_utils] Make test more realistically (#4377) 2014-12-06 12:36:23 +01:00
Philipp Hagemeister
db175341c7 Credit @cryptonaut for adultswim (#4388) 2014-12-06 12:32:01 +01:00
Philipp Hagemeister
9ff6772790 [youtube] Modernize 2014-12-06 12:20:54 +01:00
Philipp Hagemeister
5f9b83944d [ffmpeg] Improve version check and call it from hls (Fixes #4377) 2014-12-06 12:14:26 +01:00
Philipp Hagemeister
f6735be4da Merge remote-tracking branch 'cryptonaut/adultswim' 2014-12-06 11:55:24 +01:00
Philipp Hagemeister
6a3e0103bb [nba] Add test for #4387 2014-12-06 11:26:17 +01:00
Philipp Hagemeister
0b5cc1983e [nba] Modernize 2014-12-06 11:15:25 +01:00
cryptonaut
1a9f8b1ad4 [nba] Improve _VALID_URL regex (fixes #4387)
Allows for optional trailing / or /index.html
2014-12-06 01:49:22 -08:00
cryptonaut
7115599121 [adultswim] Updated to work with new site format (fixes #4317) 2014-12-05 21:55:47 -08:00
Philipp Hagemeister
0df23ba9f9 release 2014.12.06.1 2014-12-06 00:48:34 +01:00
Philipp Hagemeister
58daf5ebed [youporn] Fix JSON parameter regexp (Fixes #4384) 2014-12-06 00:48:29 +01:00
Philipp Hagemeister
1a7c6c69d3 release 2014.12.06 2014-12-06 00:43:04 +01:00
Philipp Hagemeister
045c48847a [tagesschau] Add suppot for sendung (Fixes #4378) 2014-12-06 00:42:43 +01:00
Sergey M․
90644a6843 [azubu] Add extractor (Closes #4379) 2014-12-05 22:08:30 +06:00
Philipp Hagemeister
122c2f87c1 [tagesschau] Modernize 2014-12-05 10:59:55 +01:00
Philipp Hagemeister
a154eb3d15 release 2014.12.04.2 2014-12-04 17:43:39 +01:00
Philipp Hagemeister
81028ff9eb [xminus] Capture description (#4300) 2014-12-04 17:43:34 +01:00
Philipp Hagemeister
e8df5cee12 [minhateca] Fix duration parsing 2014-12-04 17:35:40 +01:00
Philipp Hagemeister
ab07963b5c release 2014.12.04.1 2014-12-04 17:02:23 +01:00
Philipp Hagemeister
7e26084d09 Merge branch 'master' of github.com:rg3/youtube-dl 2014-12-04 17:02:14 +01:00
Philipp Hagemeister
4349c07dd7 [minhateca] Add extractor (Fixes #4094) 2014-12-04 17:02:05 +01:00
Sergey M․
1139a54d9b [foxnews] Add extractor (Closes #4352) 2014-12-04 21:19:08 +06:00
Sergey M․
b128c9ed68 [vine:user] Add support for another URL format (Closes #4365) 2014-12-04 20:12:06 +06:00
Philipp Hagemeister
9776bc7f57 release 2014.12.04 2014-12-04 08:34:12 +01:00
Philipp Hagemeister
e703fc66c2 Merge remote-tracking branch 'origin/master'
Conflicts:
	youtube_dl/extractor/audiomack.py
2014-12-04 08:33:37 +01:00
Philipp Hagemeister
39c52bbd32 [myvidster] Enforce age limit in test 2014-12-04 08:31:55 +01:00
Philipp Hagemeister
6219802165 Merge remote-tracking branch 'zackfern/myvidster' 2014-12-04 08:30:22 +01:00
Philipp Hagemeister
8b97115358 Credit @zackfern for foxgay (#4371) 2014-12-04 08:28:41 +01:00
Philipp Hagemeister
810fb84d5e pep8 and minor beautification all around 2014-12-04 08:27:40 +01:00
Philipp Hagemeister
5f5e993dc6 [bbccouk] Remove unused import 2014-12-04 08:22:53 +01:00
Philipp Hagemeister
191cc41ba4 [foxgay] Add thumbnail to test definition 2014-12-04 08:22:20 +01:00
Jaime Marquínez Ferrándiz
abe70fa044 [audiomack] Modernize test definition 2014-12-04 08:21:29 +01:00
Philipp Hagemeister
7f142293df Merge remote-tracking branch 'zackfern/foxgay' 2014-12-04 08:20:01 +01:00
Philipp Hagemeister
d4e06d4a83 [options] Standardize mentoined configuration file location (Fixes #4367) 2014-12-04 07:57:18 +01:00
Zack Fernandes
ecd7ea1e6b [myvidster] Added support for Myvidster 2014-12-03 22:22:36 -08:00
Zack Fernandes
b92c548693 [foxgay] Initial support 2014-12-03 20:22:48 -08:00
Tithen-Firion
eecd6a467d [vgtv] Update tests 2014-12-04 01:34:24 +01:00
Philipp Hagemeister
dce2a3cf9e [break] Remove md5sum from test 2014-12-04 01:33:30 +01:00
Tithen-Firion
9095aa38ac [audiomack] Update test 2014-12-04 00:42:01 +01:00
Tithen-Firion
0403b06985 [soundcloud] Improve_VALID_URL
Add support for links from Audiomack
2014-12-04 00:42:01 +01:00
Sergey M․
de9bd74bc2 [ted] Fix type_watch links extraction 2014-12-03 21:17:11 +06:00
Jaime Marquínez Ferrándiz
233d37fb6b [brightcove] Make sure that the 'ext' variable is set (fixes #4360) 2014-12-03 13:25:49 +01:00
Philipp Hagemeister
c627f7d48c release 2014.12.03 2014-12-03 12:15:34 +01:00
Jaime Marquínez Ferrándiz
163c8babaa [nhl] Simplify 2014-12-03 00:08:26 +01:00
Jaime Marquínez Ferrándiz
6708542099 Merge branch 'master' of https://github.com/akretz/youtube-dl 2014-12-03 00:00:05 +01:00
Jaime Marquínez Ferrándiz
ea2ee40357 [nhl.com:videocenter] Don't match url with 'id=*' before 'catid' in the query
Since the order extractors are added is not defined, it would match instead of NHLIE.
2014-12-02 23:56:30 +01:00
Adrian Kretz
62d8b56655 [nhl] Support videos which don't have mp4-extension (fixes #4348) 2014-12-02 23:26:37 +01:00
Sergey M․
c492970b4b [rts] Improve _VALID_URL 2014-12-02 22:24:47 +06:00
Sergey M․
ac5633592a [24video] Add extractor (Closes #4350) 2014-12-02 22:23:23 +06:00
Sergey M․
706d7d4ee7 [YoutubeDL] Avoid negative timestamps on Windows 2014-12-02 21:18:07 +06:00
Sergey M․
752c8c9b76 [rts] Improve _VALID_URL 2014-12-02 20:53:19 +06:00
Sergey M․
b1399a144d [rts] Add support for the new URL format and extract display id (Closes #4349) 2014-12-02 20:45:43 +06:00
Jaime Marquínez Ferrándiz
05177b34a6 [rutube] Extract m3u8 formats (fixes #3984) 2014-12-01 18:20:36 +01:00
Jaime Marquínez Ferrándiz
c41a9650c3 [youtube] Extract framerate from the dash manifest
Not all videos have 60 fps, for example they can have 48 fps.
2014-12-01 17:36:12 +01:00
Philipp Hagemeister
df015c69ea release 2014.12.01 2014-12-01 17:28:34 +01:00
Naglis Jonaitis
1434bffa1f [tunein] Use station API 2014-12-01 18:10:15 +02:00
Jaime Marquínez Ferrándiz
94aa25b995 Credit @Tithen-Firion for the myspace changes (#4341) 2014-12-01 16:15:09 +01:00
Sergey M․
d128cfe393 [slideshare] Fix description extraction 2014-12-01 20:18:42 +06:00
Jaime Marquínez Ferrándiz
954f36f890 [myspace] Cleanup 2014-12-01 00:10:12 +01:00
Jaime Marquínez Ferrándiz
19e92770c9 [myspace] Replace removed test video and fix the others 2014-12-01 00:10:12 +01:00
Tithen-Firion
95c673a148 [myspace] Add extractor for albums 2014-12-01 00:10:12 +01:00
Tithen-Firion
a196a53265 [myspace] Update tests 2014-12-01 00:10:12 +01:00
Tithen-Firion
3266f0c68e [myspace] Redirect to other extractors
There are many songs just linked from Vevo/YouTube to MySpace.
Vevo example: https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041
YouTube example: https://myspace.com/starset2/music/song/first-light-95799905-106964426
2014-12-01 00:10:12 +01:00
Tithen-Firion
1940fadd53 [myspace] Handle non-playable songs
I'm adding this because sometimes there is a song page, but you cannot play it.
Example: https://myspace.com/starset2/music/song/let-it-die-maniac-agenda-remix-bonus-track-95799916-106964439
It will be useful for downloading whole album with songs like this.
2014-12-01 00:10:11 +01:00
Tithen-Firion
03fd72d996 [myspace] Add more data to info dict
`uploader` is an artist
`playlist` is an album
2014-12-01 00:10:11 +01:00
Tithen-Firion
f2b44a2513 [myspace] Use player_url for faster download
It keeps reconnecting without it. Download time decreased from 7+ minutes to 25 seconds for me.
2014-12-01 00:10:11 +01:00
Jaime Marquínez Ferrándiz
c522adb1f0 [youtube] Add a normal age-gate test video 2014-11-30 21:45:49 +01:00
Jaime Marquínez Ferrándiz
7160532d41 [youtube] Simplify code for getting the dash manifest url
video_info contains now the 'ytplayer.config.args' dictionary
2014-11-30 21:07:50 +01:00
Jaime Marquínez Ferrándiz
4e62ebe250 [youtube] Try to extract the video_info from the webpage before requesting the 'get_video_info' pages
The YouTube player doesn't seem to use them except for embedded videos, so we can skip a network request.
But they still provide better error mesagges (for removed videos for example).
2014-11-30 20:56:32 +01:00
Jaime Marquínez Ferrándiz
4472f84f0c [test/test_subtitles] Update checksum for vimeo subtitle file 2014-11-30 19:42:54 +01:00
Jaime Marquínez Ferrándiz
b766eb2707 [youtube] Update test 2014-11-30 19:18:39 +01:00
Jaime Marquínez Ferrándiz
10a404c335 [youtube] Add format 313 (fixes #4339) 2014-11-30 18:56:14 +01:00
Sergey M․
c056efa2e3 [bbccouk] Fix extraction (#4104, #4214) 2014-11-30 22:37:56 +06:00
Philipp Hagemeister
283ac8d592 Merge pull request #4338 from t0mm0/x-minus-fix
[xminus] update tkn extraction regex
2014-11-30 17:11:05 +01:00
t0mm0
313d4572ce [xminus] update tkn extraction regex 2014-11-30 16:04:04 +00:00
Jaime Marquínez Ferrándiz
42939b6129 [youtube] Use a cookie for seeting the language
This way, we don't have to do an aditional request
2014-11-30 00:03:59 +01:00
Jaime Marquínez Ferrándiz
37ea8164d3 [youtube] Don't confirm age when initializing
It seems that all the videos with age restriction use now the age gate method, which doesn't require any confirmation.
2014-11-29 23:46:39 +01:00
Jaime Marquínez Ferrándiz
8c810a7db3 Merge pull request #4333 from ymln/bliptv-fixes
[bliptv] Fix some videos not downloading
2014-11-29 20:20:45 +01:00
Yuriy Melnyk
248a0b890f [bliptv] Fix \n\n at the end of real_url
See https://github.com/rg3/youtube-dl/issues/3544#issuecomment-53166516
2014-11-29 19:17:56 +02:00
Yuriy Melnyk
96b7c7fe3f [bliptv] Fix resolution of lookup id in some videos
In some videos (for example, http://blip.tv/play/gbk766dkj4Yn) resolving
lookup id would fail, because page at
http://blip.tv/play/gbk766dkj4Yn.x?p=1 would have no "config.id" in
it. Fixed by requesting different URL and inspecting the URL which the
client is redirected to.
2014-11-29 19:17:56 +02:00
Sergey M․
e987e91fcc [playvid] Capture and output error message 2014-11-29 22:16:35 +06:00
Sergey M․
cb6444e197 [noco] Add support for multi language videos (Closes #4326) 2014-11-28 20:38:47 +06:00
Philipp Hagemeister
93b8a10e3b release 2014.11.27 2014-11-27 15:44:49 +01:00
Philipp Hagemeister
4207558e8b [buzzfeed] Add support for more video types (#4259) 2014-11-27 15:44:35 +01:00
Philipp Hagemeister
ad0d800fc3 release 2014.11.26.4 2014-11-26 22:53:02 +01:00
Philipp Hagemeister
e232f787f6 [buzzfeed] Add new extractor (Fixes #4259) 2014-11-26 22:52:52 +01:00
Philipp Hagemeister
155f9550c0 [test/helper] Fix newlines in output of missing test fields 2014-11-26 22:52:28 +01:00
Philipp Hagemeister
72476fcc42 release 2014.11.26.3 2014-11-26 22:08:30 +01:00
Philipp Hagemeister
29e950f7c8 release 2014.11.26.2 2014-11-26 22:06:27 +01:00
Philipp Hagemeister
7c8ea53b96 release 2014.11.26.1 2014-11-26 22:01:06 +01:00
Philipp Hagemeister
dcddc10a50 [test_unicode_literals] Arm unicode_literals check
From now on, the line

from __future__ import unicode_literals

should be contained in every single Python file lest we run into any more 2.x/3.x issues.
Going forward, we're likely to develop on 3.x only and would likely miss subtle bugs otherwise.
2014-11-26 20:01:22 +01:00
Sergey M․
a1008af412 [gorillavid] Update IE_DESC 2014-11-27 00:24:19 +06:00
Sergey M․
61c0663c1e [udemy] Generalize download json and fix login 2014-11-26 21:25:43 +06:00
Sergey M․
81a7a521c5 [gorillavid] Remove unused import 2014-11-26 21:02:46 +06:00
Sergey M․
e293711802 [udemy] Set session cookies to API requests (Closes #4124, closes #4219, closes #4308) 2014-11-26 21:00:18 +06:00
Sergey M․
ceb3367320 [gorillavid] Generalize extraction with countdown timeout and support faststream.in (Closes #4297) 2014-11-26 20:02:40 +06:00
Philipp Hagemeister
a03aaaed2e Declare Python 3.2 compatibility 2014-11-26 13:08:42 +01:00
Philipp Hagemeister
e075a44afb [tests] Remove useless u prefixes 2014-11-26 13:07:32 +01:00
Philipp Hagemeister
8865bdeb37 Remove useless u prefixes 2014-11-26 13:06:02 +01:00
Philipp Hagemeister
3aa578cad2 [ffmpeg] Modernize 2014-11-26 13:05:49 +01:00
Philipp Hagemeister
d3b5101a91 [videopremium] Modernize 2014-11-26 13:03:22 +01:00
Philipp Hagemeister
5c32110114 [videofyme] Modernize 2014-11-26 13:01:39 +01:00
Philipp Hagemeister
24144e3b8d [tvp] Modernize 2014-11-26 12:58:53 +01:00
Philipp Hagemeister
b3034f9df7 [trilulilu] Modernize 2014-11-26 12:56:43 +01:00
Philipp Hagemeister
4c6d2ff8dc [sohu] Modernize 2014-11-26 12:53:55 +01:00
Philipp Hagemeister
faf3494894 [redtube] Modernize 2014-11-26 12:52:45 +01:00
Philipp Hagemeister
535a66ef66 [muzu] Modernize 2014-11-26 12:50:37 +01:00
Philipp Hagemeister
5c40bba82f [hotnewhiphop] Modernize 2014-11-26 12:45:40 +01:00
Philipp Hagemeister
855dc479c2 [subtitles] Modernize 2014-11-26 12:43:06 +01:00
Philipp Hagemeister
0792d5634e [youtube] Remove useless u prefixes 2014-11-26 12:41:53 +01:00
Philipp Hagemeister
e91cdcae1a [appletrailers] Modernize 2014-11-26 12:41:24 +01:00
Philipp Hagemeister
27e1400f55 [aparat] Modernize 2014-11-26 12:40:51 +01:00
Philipp Hagemeister
e0938e7731 [addanime] Modernize 2014-11-26 12:40:05 +01:00
Philipp Hagemeister
b72823a0a4 [francetv] PEP8 2014-11-26 12:38:20 +01:00
Philipp Hagemeister
673cf0e773 [update] Remove useless import 2014-11-26 12:37:45 +01:00
Philipp Hagemeister
f8aace93cd [academicearth] Modernize 2014-11-26 12:35:57 +01:00
Philipp Hagemeister
80310134e0 [mplayer] Modernize 2014-11-26 12:34:52 +01:00
Philipp Hagemeister
4d2d638df4 [http] Modernize 2014-11-26 12:27:36 +01:00
Philipp Hagemeister
0e44f90e18 [hls] Remove useless u porefixes 2014-11-26 12:26:21 +01:00
Philipp Hagemeister
15938ab67a [update] Modernize 2014-11-26 12:24:57 +01:00
Philipp Hagemeister
ab4ee31eb1 [utils] remove useless u prefix 2014-11-26 11:50:22 +01:00
Philipp Hagemeister
b061ea6e9f [compat] Beautify assertion 2014-11-26 11:48:09 +01:00
Philipp Hagemeister
4aae94f9d0 [YoutubeDL] Remove incorrect documentation 2014-11-26 11:25:43 +01:00
Philipp Hagemeister
acda92f6bc Clarify --no-playlist documentation (Closes #4309) 2014-11-26 10:51:03 +01:00
Philipp Hagemeister
ddfd0f2727 release 2014.11.26 2014-11-26 10:46:12 +01:00
Philipp Hagemeister
d0720e7118 Merge branch 'master' of github.com:rg3/youtube-dl 2014-11-26 10:45:57 +01:00
Philipp Hagemeister
4e262a8838 [generic] Detect direct video links (Fixes #4149, #4313) 2014-11-26 10:44:39 +01:00
Sergey M․
b9ed3af343 [tass] Add extractor (Closes #4296) 2014-11-25 22:24:33 +06:00
103 changed files with 2166 additions and 797 deletions

View File

@@ -88,3 +88,7 @@ Dao Hoang Son
Oskar Jauch Oskar Jauch
Matthew Rayfield Matthew Rayfield
t0mm0 t0mm0
Tithen-Firion
Zack Fernandes
cryptonaut
Adrian Kretz

View File

@@ -30,7 +30,7 @@ Alternatively, refer to the developer instructions below for how to check out an
# DESCRIPTION # DESCRIPTION
**youtube-dl** is a small command-line program to download videos from **youtube-dl** is a small command-line program to download videos from
YouTube.com and a few more sites. It requires the Python interpreter, version YouTube.com and a few more sites. It requires the Python interpreter, version
2.6, 2.7, or 3.3+, and it is not platform specific. It should work on 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on
your Unix box, on Windows or on Mac OS X. It is released to the public domain, your Unix box, on Windows or on Mac OS X. It is released to the public domain,
which means you can modify it, redistribute it or use it however you like. which means you can modify it, redistribute it or use it however you like.
@@ -65,10 +65,10 @@ which means you can modify it, redistribute it or use it however you like.
this is not possible instead of searching. this is not possible instead of searching.
--ignore-config Do not read configuration files. When given --ignore-config Do not read configuration files. When given
in the global configuration file /etc in the global configuration file /etc
/youtube-dl.conf: do not read the user /youtube-dl.conf: Do not read the user
configuration in ~/.config/youtube-dl.conf configuration in ~/.config/youtube-
(%APPDATA%/youtube-dl/config.txt on dl/config (%APPDATA%/youtube-dl/config.txt
Windows) on Windows)
--flat-playlist Do not extract the videos of a playlist, --flat-playlist Do not extract the videos of a playlist,
only list them. only list them.
@@ -93,7 +93,8 @@ which means you can modify it, redistribute it or use it however you like.
COUNT views COUNT views
--max-views COUNT Do not download any videos with more than --max-views COUNT Do not download any videos with more than
COUNT views COUNT views
--no-playlist download only the currently playing video --no-playlist If the URL refers to a video and a
playlist, download only the video.
--age-limit YEARS download only videos suitable for the given --age-limit YEARS download only videos suitable for the given
age age
--download-archive FILE Download only videos not listed in the --download-archive FILE Download only videos not listed in the

View File

@@ -1,4 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import unicode_literals
import os import os
from os.path import dirname as dirn from os.path import dirname as dirn
import sys import sys

View File

@@ -1,4 +1,5 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import unicode_literals
""" """
This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check

View File

@@ -1,4 +1,5 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import unicode_literals
import json import json
import sys import sys

View File

@@ -1,4 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import unicode_literals
import hashlib import hashlib
import urllib.request import urllib.request
import json import json

View File

@@ -1,4 +1,5 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import unicode_literals, with_statement
import rsa import rsa
import json import json
@@ -29,4 +30,5 @@ signature = hexlify(rsa.pkcs1.sign(json.dumps(versions_info, sort_keys=True).enc
print('signature: ' + signature) print('signature: ' + signature)
versions_info['signature'] = signature versions_info['signature'] = signature
json.dump(versions_info, open('update/versions.json', 'w'), indent=4, sort_keys=True) with open('update/versions.json', 'w') as versionsf:
json.dump(versions_info, versionsf, indent=4, sort_keys=True)

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8 # coding: utf-8
from __future__ import with_statement from __future__ import with_statement, unicode_literals
import datetime import datetime
import glob import glob
@@ -13,7 +13,7 @@ year = str(datetime.datetime.now().year)
for fn in glob.glob('*.html*'): for fn in glob.glob('*.html*'):
with io.open(fn, encoding='utf-8') as f: with io.open(fn, encoding='utf-8') as f:
content = f.read() content = f.read()
newc = re.sub(u'(?P<copyright>Copyright © 2006-)(?P<year>[0-9]{4})', u'Copyright © 2006-' + year, content) newc = re.sub(r'(?P<copyright>Copyright © 2006-)(?P<year>[0-9]{4})', 'Copyright © 2006-' + year, content)
if content != newc: if content != newc:
tmpFn = fn + '.part' tmpFn = fn + '.part'
with io.open(tmpFn, 'wt', encoding='utf-8') as outf: with io.open(tmpFn, 'wt', encoding='utf-8') as outf:

View File

@@ -1,4 +1,5 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import unicode_literals
import datetime import datetime
import io import io

View File

@@ -1,4 +1,5 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import unicode_literals
import sys import sys
import os import os

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import io import io
import sys import sys
import re import re

View File

@@ -1,3 +1,4 @@
from __future__ import unicode_literals
import io import io
import os.path import os.path

View File

@@ -1,4 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import unicode_literals
import os import os
from os.path import dirname as dirn from os.path import dirname as dirn
import sys import sys

View File

@@ -102,7 +102,9 @@ setup(
"Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.6",
"Programming Language :: Python :: 2.7", "Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3", "Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.3" "Programming Language :: Python :: 3.2",
"Programming Language :: Python :: 3.3",
"Programming Language :: Python :: 3.4",
], ],
**params **params

View File

@@ -141,7 +141,7 @@ def expect_info_dict(self, expected_dict, got_dict):
if missing_keys: if missing_keys:
def _repr(v): def _repr(v):
if isinstance(v, compat_str): if isinstance(v, compat_str):
return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'") return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n')
else: else:
return repr(v) return repr(v)
info_dict_str = ''.join( info_dict_str = ''.join(

View File

@@ -97,7 +97,7 @@ def generator(test_case):
return return
for other_ie in other_ies: for other_ie in other_ies:
if not other_ie.working(): if not other_ie.working():
print_skipping(u'test depends on %sIE, marked as not WORKING' % other_ie.ie_key()) print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
return return
params = get_params(test_case.get('params', {})) params = get_params(test_case.get('params', {}))
@@ -143,7 +143,7 @@ def generator(test_case):
raise raise
if try_num == RETRIES: if try_num == RETRIES:
report_warning(u'Failed due to network errors, skipping...') report_warning('Failed due to network errors, skipping...')
return return
print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num)) print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))

View File

@@ -238,7 +238,7 @@ class TestVimeoSubtitles(BaseTestSubtitles):
def test_subtitles(self): def test_subtitles(self):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888') self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4')
def test_subtitles_lang(self): def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True

View File

@@ -9,14 +9,13 @@ rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
IGNORED_FILES = [ IGNORED_FILES = [
'setup.py', # http://bugs.python.org/issue13943 'setup.py', # http://bugs.python.org/issue13943
'conf.py',
'buildserver.py',
] ]
class TestUnicodeLiterals(unittest.TestCase): class TestUnicodeLiterals(unittest.TestCase):
def test_all_files(self): def test_all_files(self):
print('Skipping this test (not yet fully implemented)')
return
for dirpath, _, filenames in os.walk(rootDir): for dirpath, _, filenames in os.walk(rootDir):
for basename in filenames: for basename in filenames:
if not basename.endswith('.py'): if not basename.endswith('.py'):
@@ -30,10 +29,10 @@ class TestUnicodeLiterals(unittest.TestCase):
if "'" not in code and '"' not in code: if "'" not in code and '"' not in code:
continue continue
imps = 'from __future__ import unicode_literals' self.assertRegexpMatches(
self.assertTrue( code,
imps in code, r'(?:#.*\n*)?from __future__ import (?:[a-z_]+,\s*)*unicode_literals',
' %s missing in %s' % (imps, fn)) 'unicode_literals import missing in %s' % fn)
m = re.search(r'(?<=\s)u[\'"](?!\)|,|$)', code) m = re.search(r'(?<=\s)u[\'"](?!\)|,|$)', code)
if m is not None: if m is not None:

View File

@@ -48,6 +48,7 @@ from youtube_dl.utils import (
intlist_to_bytes, intlist_to_bytes,
args_to_str, args_to_str,
parse_filesize, parse_filesize,
version_tuple,
) )
@@ -171,7 +172,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3') self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
def test_smuggle_url(self): def test_smuggle_url(self):
data = {u"ö": u"ö", u"abc": [3]} data = {"ö": "ö", "abc": [3]}
url = 'https://foo.bar/baz?x=y#a' url = 'https://foo.bar/baz?x=y#a'
smug_url = smuggle_url(url, data) smug_url = smuggle_url(url, data)
unsmug_url, unsmug_data = unsmuggle_url(smug_url) unsmug_url, unsmug_data = unsmuggle_url(smug_url)
@@ -220,6 +221,9 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_duration('0s'), 0) self.assertEqual(parse_duration('0s'), 0)
self.assertEqual(parse_duration('01:02:03.05'), 3723.05) self.assertEqual(parse_duration('01:02:03.05'), 3723.05)
self.assertEqual(parse_duration('T30M38S'), 1838) self.assertEqual(parse_duration('T30M38S'), 1838)
self.assertEqual(parse_duration('5 s'), 5)
self.assertEqual(parse_duration('3 min'), 180)
self.assertEqual(parse_duration('2.5 hours'), 9000)
def test_fix_xml_ampersands(self): def test_fix_xml_ampersands(self):
self.assertEqual( self.assertEqual(
@@ -376,6 +380,12 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_filesize('2 MiB'), 2097152) self.assertEqual(parse_filesize('2 MiB'), 2097152)
self.assertEqual(parse_filesize('5 GB'), 5000000000) self.assertEqual(parse_filesize('5 GB'), 5000000000)
self.assertEqual(parse_filesize('1.2Tb'), 1200000000000) self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
self.assertEqual(parse_filesize('1,24 KB'), 1240)
def test_version_tuple(self):
self.assertEqual(version_tuple('1'), (1,))
self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
self.assertEqual(version_tuple('10.1-6'), (10, 1, 6)) # avconv style
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -1,5 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals
# Allow direct execution # Allow direct execution
import os import os

View File

@@ -1,5 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals
# Allow direct execution # Allow direct execution
import os import os
@@ -32,7 +33,7 @@ params = get_params({
TEST_ID = 'BaW_jenozKc' TEST_ID = 'BaW_jenozKc'
INFO_JSON_FILE = TEST_ID + '.info.json' INFO_JSON_FILE = TEST_ID + '.info.json'
DESCRIPTION_FILE = TEST_ID + '.mp4.description' DESCRIPTION_FILE = TEST_ID + '.mp4.description'
EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐 EXPECTED_DESCRIPTION = '''test chars: "'/\ä↭𝕐
test URL: https://github.com/rg3/youtube-dl/issues/1892 test URL: https://github.com/rg3/youtube-dl/issues/1892
This is a test video for youtube-dl. This is a test video for youtube-dl.
@@ -53,11 +54,11 @@ class TestInfoJSON(unittest.TestCase):
self.assertTrue(os.path.exists(INFO_JSON_FILE)) self.assertTrue(os.path.exists(INFO_JSON_FILE))
with io.open(INFO_JSON_FILE, 'r', encoding='utf-8') as jsonf: with io.open(INFO_JSON_FILE, 'r', encoding='utf-8') as jsonf:
jd = json.load(jsonf) jd = json.load(jsonf)
self.assertEqual(jd['upload_date'], u'20121002') self.assertEqual(jd['upload_date'], '20121002')
self.assertEqual(jd['description'], EXPECTED_DESCRIPTION) self.assertEqual(jd['description'], EXPECTED_DESCRIPTION)
self.assertEqual(jd['id'], TEST_ID) self.assertEqual(jd['id'], TEST_ID)
self.assertEqual(jd['extractor'], 'youtube') self.assertEqual(jd['extractor'], 'youtube')
self.assertEqual(jd['title'], u'''youtube-dl test video "'/\ä↭𝕐''') self.assertEqual(jd['title'], '''youtube-dl test video "'/\ä↭𝕐''')
self.assertEqual(jd['uploader'], 'Philipp Hagemeister') self.assertEqual(jd['uploader'], 'Philipp Hagemeister')
self.assertTrue(os.path.exists(DESCRIPTION_FILE)) self.assertTrue(os.path.exists(DESCRIPTION_FILE))

View File

@@ -1,4 +1,5 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import unicode_literals
# Allow direct execution # Allow direct execution
import os import os

View File

@@ -7,6 +7,7 @@ import collections
import datetime import datetime
import errno import errno
import io import io
import itertools
import json import json
import locale import locale
import os import os
@@ -654,21 +655,28 @@ class YoutubeDL(object):
if playlistend == -1: if playlistend == -1:
playlistend = None playlistend = None
if isinstance(ie_result['entries'], list): ie_entries = ie_result['entries']
n_all_entries = len(ie_result['entries']) if isinstance(ie_entries, list):
entries = ie_result['entries'][playliststart:playlistend] n_all_entries = len(ie_entries)
entries = ie_entries[playliststart:playlistend]
n_entries = len(entries) n_entries = len(entries)
self.to_screen( self.to_screen(
"[%s] playlist %s: Collected %d video ids (downloading %d of them)" % "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
(ie_result['extractor'], playlist, n_all_entries, n_entries)) (ie_result['extractor'], playlist, n_all_entries, n_entries))
else: elif isinstance(ie_entries, PagedList):
assert isinstance(ie_result['entries'], PagedList) entries = ie_entries.getslice(
entries = ie_result['entries'].getslice(
playliststart, playlistend) playliststart, playlistend)
n_entries = len(entries) n_entries = len(entries)
self.to_screen( self.to_screen(
"[%s] playlist %s: Downloading %d videos" % "[%s] playlist %s: Downloading %d videos" %
(ie_result['extractor'], playlist, n_entries)) (ie_result['extractor'], playlist, n_entries))
else: # iterable
entries = list(itertools.islice(
ie_entries, playliststart, playlistend))
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Downloading %d videos" %
(ie_result['extractor'], playlist, n_entries))
for i, entry in enumerate(entries, 1): for i, entry in enumerate(entries, 1):
self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries)) self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
@@ -787,6 +795,10 @@ class YoutubeDL(object):
info_dict['display_id'] = info_dict['id'] info_dict['display_id'] = info_dict['id']
if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None: if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
# Working around negative timestamps in Windows
# (see http://bugs.python.org/issue1646728)
if info_dict['timestamp'] < 0 and os.name == 'nt':
info_dict['timestamp'] = 0
upload_date = datetime.datetime.utcfromtimestamp( upload_date = datetime.datetime.utcfromtimestamp(
info_dict['timestamp']) info_dict['timestamp'])
info_dict['upload_date'] = upload_date.strftime('%Y%m%d') info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
@@ -930,8 +942,12 @@ class YoutubeDL(object):
if self.params.get('forceid', False): if self.params.get('forceid', False):
self.to_stdout(info_dict['id']) self.to_stdout(info_dict['id'])
if self.params.get('forceurl', False): if self.params.get('forceurl', False):
# For RTMP URLs, also include the playpath if info_dict.get('requested_formats') is not None:
self.to_stdout(info_dict['url'] + info_dict.get('play_path', '')) for f in info_dict['requested_formats']:
self.to_stdout(f['url'] + f.get('play_path', ''))
else:
# For RTMP URLs, also include the playpath
self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None: if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
self.to_stdout(info_dict['thumbnail']) self.to_stdout(info_dict['thumbnail'])
if self.params.get('forcedescription', False) and info_dict.get('description') is not None: if self.params.get('forcedescription', False) and info_dict.get('description') is not None:

View File

@@ -1,4 +1,5 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import unicode_literals
# Execute with # Execute with
# $ python youtube_dl/__main__.py (2.6+) # $ python youtube_dl/__main__.py (2.6+)

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text'] __all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
import base64 import base64

View File

@@ -247,7 +247,7 @@ else:
userhome = compat_getenv('HOME') userhome = compat_getenv('HOME')
elif 'USERPROFILE' in os.environ: elif 'USERPROFILE' in os.environ:
userhome = compat_getenv('USERPROFILE') userhome = compat_getenv('USERPROFILE')
elif not 'HOMEPATH' in os.environ: elif 'HOMEPATH' not in os.environ:
return path return path
else: else:
try: try:
@@ -270,7 +270,7 @@ if sys.version_info < (3, 0):
print(s.encode(preferredencoding(), 'xmlcharrefreplace')) print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
else: else:
def compat_print(s): def compat_print(s):
assert type(s) == type(u'') assert isinstance(s, compat_str)
print(s) print(s)

View File

@@ -4,6 +4,7 @@ import os
import re import re
import subprocess import subprocess
from ..postprocessor.ffmpeg import FFmpegPostProcessor
from .common import FileDownloader from .common import FileDownloader
from ..utils import ( from ..utils import (
compat_urlparse, compat_urlparse,
@@ -28,14 +29,17 @@ class HlsFD(FileDownloader):
if check_executable(program, ['-version']): if check_executable(program, ['-version']):
break break
else: else:
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found. Please install one.') self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
return False return False
cmd = [program] + args cmd = [program] + args
ffpp = FFmpegPostProcessor(downloader=self)
ffpp.check_version()
retval = subprocess.call(cmd) retval = subprocess.call(cmd)
if retval == 0: if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[%s] %s bytes' % (cmd[0], fsize)) self.to_screen('\r[%s] %s bytes' % (cmd[0], fsize))
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
self._hook_progress({ self._hook_progress({
'downloaded_bytes': fsize, 'downloaded_bytes': fsize,
@@ -45,8 +49,8 @@ class HlsFD(FileDownloader):
}) })
return True return True
else: else:
self.to_stderr(u"\n") self.to_stderr('\n')
self.report_error(u'%s exited with code %d' % (program, retval)) self.report_error('%s exited with code %d' % (program, retval))
return False return False

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import os import os
import time import time
@@ -106,7 +108,7 @@ class HttpFD(FileDownloader):
self.report_retry(count, retries) self.report_retry(count, retries)
if count > retries: if count > retries:
self.report_error(u'giving up after %s retries' % retries) self.report_error('giving up after %s retries' % retries)
return False return False
data_len = data.info().get('Content-length', None) data_len = data.info().get('Content-length', None)
@@ -124,10 +126,10 @@ class HttpFD(FileDownloader):
min_data_len = self.params.get("min_filesize", None) min_data_len = self.params.get("min_filesize", None)
max_data_len = self.params.get("max_filesize", None) max_data_len = self.params.get("max_filesize", None)
if min_data_len is not None and data_len < min_data_len: if min_data_len is not None and data_len < min_data_len:
self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len)) self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
return False return False
if max_data_len is not None and data_len > max_data_len: if max_data_len is not None and data_len > max_data_len:
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False return False
data_len_str = format_bytes(data_len) data_len_str = format_bytes(data_len)
@@ -151,13 +153,13 @@ class HttpFD(FileDownloader):
filename = self.undo_temp_name(tmpfilename) filename = self.undo_temp_name(tmpfilename)
self.report_destination(filename) self.report_destination(filename)
except (OSError, IOError) as err: except (OSError, IOError) as err:
self.report_error(u'unable to open for writing: %s' % str(err)) self.report_error('unable to open for writing: %s' % str(err))
return False return False
try: try:
stream.write(data_block) stream.write(data_block)
except (IOError, OSError) as err: except (IOError, OSError) as err:
self.to_stderr(u"\n") self.to_stderr('\n')
self.report_error(u'unable to write data: %s' % str(err)) self.report_error('unable to write data: %s' % str(err))
return False return False
if not self.params.get('noresizebuffer', False): if not self.params.get('noresizebuffer', False):
block_size = self.best_block_size(after - before, len(data_block)) block_size = self.best_block_size(after - before, len(data_block))
@@ -188,10 +190,10 @@ class HttpFD(FileDownloader):
self.slow_down(start, byte_counter - resume_len) self.slow_down(start, byte_counter - resume_len)
if stream is None: if stream is None:
self.to_stderr(u"\n") self.to_stderr('\n')
self.report_error(u'Did not get any data blocks') self.report_error('Did not get any data blocks')
return False return False
if tmpfilename != u'-': if tmpfilename != '-':
stream.close() stream.close()
self.report_finish(data_len_str, (time.time() - start)) self.report_finish(data_len_str, (time.time() - start))
if data_len is not None and byte_counter != data_len: if data_len is not None and byte_counter != data_len:

View File

@@ -1,7 +1,10 @@
from __future__ import unicode_literals
import os import os
import subprocess import subprocess
from .common import FileDownloader from .common import FileDownloader
from ..compat import compat_subprocess_get_DEVNULL
from ..utils import ( from ..utils import (
encodeFilename, encodeFilename,
) )
@@ -13,19 +16,23 @@ class MplayerFD(FileDownloader):
self.report_destination(filename) self.report_destination(filename)
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url] args = [
'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
'-dumpstream', '-dumpfile', tmpfilename, url]
# Check for mplayer first # Check for mplayer first
try: try:
subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) subprocess.call(
['mplayer', '-h'],
stdout=compat_subprocess_get_DEVNULL(), stderr=subprocess.STDOUT)
except (OSError, IOError): except (OSError, IOError):
self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0]) self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0])
return False return False
# Download using mplayer. # Download using mplayer.
retval = subprocess.call(args) retval = subprocess.call(args)
if retval == 0: if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize)) self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
self._hook_progress({ self._hook_progress({
'downloaded_bytes': fsize, 'downloaded_bytes': fsize,
@@ -35,6 +42,6 @@ class MplayerFD(FileDownloader):
}) })
return True return True
else: else:
self.to_stderr(u"\n") self.to_stderr('\n')
self.report_error(u'mplayer exited with code %d' % retval) self.report_error('mplayer exited with code %d' % retval)
return False return False

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
from .abc import ABCIE from .abc import ABCIE
from .academicearth import AcademicEarthCourseIE from .academicearth import AcademicEarthCourseIE
from .addanime import AddAnimeIE from .addanime import AddAnimeIE
@@ -22,11 +24,13 @@ from .arte import (
) )
from .audiomack import AudiomackIE from .audiomack import AudiomackIE
from .auengine import AUEngineIE from .auengine import AUEngineIE
from .azubu import AzubuIE
from .bambuser import BambuserIE, BambuserChannelIE from .bambuser import BambuserIE, BambuserChannelIE
from .bandcamp import BandcampIE, BandcampAlbumIE from .bandcamp import BandcampIE, BandcampAlbumIE
from .bbccouk import BBCCoUkIE from .bbccouk import BBCCoUkIE
from .beeg import BeegIE from .beeg import BeegIE
from .behindkink import BehindKinkIE from .behindkink import BehindKinkIE
from .bet import BetIE
from .bild import BildIE from .bild import BildIE
from .bilibili import BiliBiliIE from .bilibili import BiliBiliIE
from .blinkx import BlinkxIE from .blinkx import BlinkxIE
@@ -36,6 +40,7 @@ from .bpb import BpbIE
from .br import BRIE from .br import BRIE
from .breakcom import BreakIE from .breakcom import BreakIE
from .brightcove import BrightcoveIE from .brightcove import BrightcoveIE
from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE from .byutv import BYUtvIE
from .c56 import C56IE from .c56 import C56IE
from .canal13cl import Canal13clIE from .canal13cl import Canal13clIE
@@ -118,6 +123,8 @@ from .fktv import (
from .flickr import FlickrIE from .flickr import FlickrIE
from .folketinget import FolketingetIE from .folketinget import FolketingetIE
from .fourtube import FourTubeIE from .fourtube import FourTubeIE
from .foxgay import FoxgayIE
from .foxnews import FoxNewsIE
from .franceculture import FranceCultureIE from .franceculture import FranceCultureIE
from .franceinter import FranceInterIE from .franceinter import FranceInterIE
from .francetv import ( from .francetv import (
@@ -213,6 +220,7 @@ from .mdr import MDRIE
from .metacafe import MetacafeIE from .metacafe import MetacafeIE
from .metacritic import MetacriticIE from .metacritic import MetacriticIE
from .mgoon import MgoonIE from .mgoon import MgoonIE
from .minhateca import MinhatecaIE
from .ministrygrid import MinistryGridIE from .ministrygrid import MinistryGridIE
from .mit import TechTVMITIE, MITIE, OCWMITIE from .mit import TechTVMITIE, MITIE, OCWMITIE
from .mitele import MiTeleIE from .mitele import MiTeleIE
@@ -239,9 +247,10 @@ from .muenchentv import MuenchenTVIE
from .musicplayon import MusicPlayOnIE from .musicplayon import MusicPlayOnIE
from .musicvault import MusicVaultIE from .musicvault import MusicVaultIE
from .muzu import MuzuTVIE from .muzu import MuzuTVIE
from .myspace import MySpaceIE from .myspace import MySpaceIE, MySpaceAlbumIE
from .myspass import MySpassIE from .myspass import MySpassIE
from .myvideo import MyVideoIE from .myvideo import MyVideoIE
from .myvidster import MyVidsterIE
from .naver import NaverIE from .naver import NaverIE
from .nba import NBAIE from .nba import NBAIE
from .nbc import ( from .nbc import (
@@ -299,6 +308,7 @@ from .promptfile import PromptFileIE
from .prosiebensat1 import ProSiebenSat1IE from .prosiebensat1 import ProSiebenSat1IE
from .pyvideo import PyvideoIE from .pyvideo import PyvideoIE
from .quickvid import QuickVidIE from .quickvid import QuickVidIE
from .radiode import RadioDeIE
from .radiofrance import RadioFranceIE from .radiofrance import RadioFranceIE
from .rai import RaiIE from .rai import RaiIE
from .rbmaradio import RBMARadioIE from .rbmaradio import RBMARadioIE
@@ -373,6 +383,7 @@ from .syfy import SyfyIE
from .sztvhu import SztvHuIE from .sztvhu import SztvHuIE
from .tagesschau import TagesschauIE from .tagesschau import TagesschauIE
from .tapely import TapelyIE from .tapely import TapelyIE
from .tass import TassIE
from .teachertube import ( from .teachertube import (
TeacherTubeIE, TeacherTubeIE,
TeacherTubeUserIE, TeacherTubeUserIE,
@@ -413,6 +424,7 @@ from .tutv import TutvIE
from .tvigle import TvigleIE from .tvigle import TvigleIE
from .tvp import TvpIE from .tvp import TvpIE
from .tvplay import TVPlayIE from .tvplay import TVPlayIE
from .twentyfourvideo import TwentyFourVideoIE
from .twitch import TwitchIE from .twitch import TwitchIE
from .ubu import UbuIE from .ubu import UbuIE
from .udemy import ( from .udemy import (

View File

@@ -1,4 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@@ -18,15 +19,14 @@ class AcademicEarthCourseIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url) playlist_id = self._match_id(url)
playlist_id = m.group('id')
webpage = self._download_webpage(url, playlist_id) webpage = self._download_webpage(url, playlist_id)
title = self._html_search_regex( title = self._html_search_regex(
r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, u'title') r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, 'title')
description = self._html_search_regex( description = self._html_search_regex(
r'<p class="excerpt"[^>]*?>(.*?)</p>', r'<p class="excerpt"[^>]*?>(.*?)</p>',
webpage, u'description', fatal=False) webpage, 'description', fatal=False)
urls = re.findall( urls = re.findall(
r'<li class="lecture-preview">\s*?<a target="_blank" href="([^"]+)">', r'<li class="lecture-preview">\s*?<a target="_blank" href="([^"]+)">',
webpage) webpage)

View File

@@ -15,8 +15,7 @@ from ..utils import (
class AddAnimeIE(InfoExtractor): class AddAnimeIE(InfoExtractor):
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<id>[\w_]+)(?:.*)'
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
_TEST = { _TEST = {
'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', 'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
'md5': '72954ea10bc979ab5e2eb288b21425a0', 'md5': '72954ea10bc979ab5e2eb288b21425a0',
@@ -29,9 +28,9 @@ class AddAnimeIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url)
try: try:
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
except ExtractorError as ee: except ExtractorError as ee:
if not isinstance(ee.cause, compat_HTTPError) or \ if not isinstance(ee.cause, compat_HTTPError) or \
@@ -49,7 +48,7 @@ class AddAnimeIE(InfoExtractor):
r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);', r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
redir_webpage) redir_webpage)
if av is None: if av is None:
raise ExtractorError(u'Cannot find redirect math task') raise ExtractorError('Cannot find redirect math task')
av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3)) av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3))
parsed_url = compat_urllib_parse_urlparse(url) parsed_url = compat_urllib_parse_urlparse(url)

View File

@@ -2,123 +2,147 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
class AdultSwimIE(InfoExtractor): class AdultSwimIE(InfoExtractor):
_VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$' _VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?'
_TEST = {
'url': 'http://video.adultswim.com/rick-and-morty/close-rick-counters-of-the-rick-kind.html?x=y#title', _TESTS = [{
'url': 'http://adultswim.com/videos/rick-and-morty/pilot',
'playlist': [ 'playlist': [
{ {
'md5': '4da359ec73b58df4575cd01a610ba5dc', 'md5': '247572debc75c7652f253c8daa51a14d',
'info_dict': { 'info_dict': {
'id': '8a250ba1450996e901453d7f02ca02f5', 'id': 'rQxZvXQ4ROaSOqq-or2Mow-0',
'ext': 'flv', 'ext': 'flv',
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 1', 'title': 'Rick and Morty - Pilot Part 1',
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', 'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
'uploader': 'Rick and Morty', },
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
}
}, },
{ {
'md5': 'ffbdf55af9331c509d95350bd0cc1819', 'md5': '77b0e037a4b20ec6b98671c4c379f48d',
'info_dict': { 'info_dict': {
'id': '8a250ba1450996e901453d7f4bd102f6', 'id': 'rQxZvXQ4ROaSOqq-or2Mow-3',
'ext': 'flv', 'ext': 'flv',
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 2', 'title': 'Rick and Morty - Pilot Part 4',
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', 'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
'uploader': 'Rick and Morty', },
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
}
}, },
],
'info_dict': {
'title': 'Rick and Morty - Pilot',
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
}
}, {
'url': 'http://www.adultswim.com/videos/playlists/american-parenting/putting-francine-out-of-business/',
'playlist': [
{ {
'md5': 'b92409635540304280b4b6c36bd14a0a', 'md5': '2eb5c06d0f9a1539da3718d897f13ec5',
'info_dict': { 'info_dict': {
'id': '8a250ba1450996e901453d7fa73c02f7', 'id': '-t8CamQlQ2aYZ49ItZCFog-0',
'ext': 'flv', 'ext': 'flv',
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 3', 'title': 'American Dad - Putting Francine Out of Business',
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', 'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
'uploader': 'Rick and Morty', },
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
}
},
{
'md5': 'e8818891d60e47b29cd89d7b0278156d',
'info_dict': {
'id': '8a250ba1450996e901453d7fc8ba02f8',
'ext': 'flv',
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 4',
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
'uploader': 'Rick and Morty',
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
}
} }
] ],
} 'info_dict': {
'title': 'American Dad - Putting Francine Out of Business',
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
},
}]
_video_extensions = { @staticmethod
'3500': 'flv', def find_video_info(collection, slug):
'640': 'mp4', for video in collection.get('videos'):
'150': 'mp4', if video.get('slug') == slug:
'ipad': 'm3u8', return video
'iphone': 'm3u8'
} @staticmethod
_video_dimensions = { def find_collection_by_linkURL(collections, linkURL):
'3500': (1280, 720), for collection in collections:
'640': (480, 270), if collection.get('linkURL') == linkURL:
'150': (320, 180) return collection
}
@staticmethod
def find_collection_containing_video(collections, slug):
for collection in collections:
for video in collection.get('videos'):
if video.get('slug') == slug:
return collection, video
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_path = mobj.group('path') show_path = mobj.group('show_path')
episode_path = mobj.group('episode_path')
is_playlist = True if mobj.group('is_playlist') else False
webpage = self._download_webpage(url, video_path) webpage = self._download_webpage(url, episode_path)
episode_id = self._html_search_regex(
r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>',
webpage, 'episode_id')
title = self._og_search_title(webpage)
index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id # Extract the value of `bootstrappedData` from the Javascript in the page.
idoc = self._download_xml(index_url, title, 'Downloading episode index', 'Unable to download episode index') bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path)
episode_el = idoc.find('.//episode') try:
show_title = episode_el.attrib.get('collectionTitle') bootstrappedData = json.loads(bootstrappedDataJS)
episode_title = episode_el.attrib.get('title') except ValueError as ve:
thumbnail = episode_el.attrib.get('thumbnailUrl') errmsg = '%s: Failed to parse JSON ' % episode_path
description = episode_el.find('./description').text.strip() raise ExtractorError(errmsg, cause=ve)
# Downloading videos from a /videos/playlist/ URL needs to be handled differently.
# NOTE: We are only downloading one video (the current one) not the playlist
if is_playlist:
collections = bootstrappedData['playlists']['collections']
collection = self.find_collection_by_linkURL(collections, show_path)
video_info = self.find_video_info(collection, episode_path)
show_title = video_info['showTitle']
segment_ids = [video_info['videoPlaybackID']]
else:
collections = bootstrappedData['show']['collections']
collection, video_info = self.find_collection_containing_video(collections, episode_path)
show = bootstrappedData['show']
show_title = show['title']
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
episode_id = video_info['id']
episode_title = video_info['title']
episode_description = video_info['description']
episode_duration = video_info.get('duration')
entries = [] entries = []
segment_els = episode_el.findall('./segments/segment') for part_num, segment_id in enumerate(segment_ids):
segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=mobile' % segment_id
for part_num, segment_el in enumerate(segment_els): segment_title = '%s - %s' % (show_title, episode_title)
segment_id = segment_el.attrib.get('id') if len(segment_ids) > 1:
segment_title = '%s %s part %d' % (show_title, episode_title, part_num + 1) segment_title += ' Part %d' % (part_num + 1)
thumbnail = segment_el.attrib.get('thumbnailUrl')
duration = segment_el.attrib.get('duration')
segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id
idoc = self._download_xml( idoc = self._download_xml(
segment_url, segment_title, segment_url, segment_title,
'Downloading segment information', 'Unable to download segment information') 'Downloading segment information', 'Unable to download segment information')
segment_duration = idoc.find('.//trt').text.strip()
formats = [] formats = []
file_els = idoc.findall('.//files/file') file_els = idoc.findall('.//files/file')
for file_el in file_els: for file_el in file_els:
bitrate = file_el.attrib.get('bitrate') bitrate = file_el.attrib.get('bitrate')
type = file_el.attrib.get('type') ftype = file_el.attrib.get('type')
width, height = self._video_dimensions.get(bitrate, (None, None))
formats.append({ formats.append({
'format_id': '%s-%s' % (bitrate, type), 'format_id': '%s_%s' % (bitrate, ftype),
'url': file_el.text, 'url': file_el.text.strip(),
'ext': self._video_extensions.get(bitrate, 'mp4'),
# The bitrate may not be a number (for example: 'iphone') # The bitrate may not be a number (for example: 'iphone')
'tbr': int(bitrate) if bitrate.isdigit() else None, 'tbr': int(bitrate) if bitrate.isdigit() else None,
'height': height, 'quality': 1 if ftype == 'hd' else -1
'width': width
}) })
self._sort_formats(formats) self._sort_formats(formats)
@@ -127,18 +151,16 @@ class AdultSwimIE(InfoExtractor):
'id': segment_id, 'id': segment_id,
'title': segment_title, 'title': segment_title,
'formats': formats, 'formats': formats,
'uploader': show_title, 'duration': segment_duration,
'thumbnail': thumbnail, 'description': episode_description
'duration': duration,
'description': description
}) })
return { return {
'_type': 'playlist', '_type': 'playlist',
'id': episode_id, 'id': episode_id,
'display_id': video_path, 'display_id': episode_path,
'entries': entries, 'entries': entries,
'title': '%s %s' % (show_title, episode_title), 'title': '%s - %s' % (show_title, episode_title),
'description': description, 'description': episode_description,
'thumbnail': thumbnail 'duration': episode_duration
} }

View File

@@ -1,5 +1,4 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
@@ -26,8 +25,7 @@ class AparatIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = m.group('id')
# Note: There is an easier-to-parse configuration at # Note: There is an easier-to-parse configuration at
# http://www.aparat.com/video/video/config/videohash/%video_id # http://www.aparat.com/video/video/config/videohash/%video_id
@@ -40,15 +38,15 @@ class AparatIE(InfoExtractor):
for i, video_url in enumerate(video_urls): for i, video_url in enumerate(video_urls):
req = HEADRequest(video_url) req = HEADRequest(video_url)
res = self._request_webpage( res = self._request_webpage(
req, video_id, note=u'Testing video URL %d' % i, errnote=False) req, video_id, note='Testing video URL %d' % i, errnote=False)
if res: if res:
break break
else: else:
raise ExtractorError(u'No working video URLs found') raise ExtractorError('No working video URLs found')
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, u'title') title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
thumbnail = self._search_regex( thumbnail = self._search_regex(
r'\s+image:\s*"([^"]+)"', webpage, u'thumbnail', fatal=False) r'\s+image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
return { return {
'id': video_id, 'id': video_id,

View File

@@ -80,7 +80,7 @@ class AppleTrailersIE(InfoExtractor):
def _clean_json(m): def _clean_json(m):
return 'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;') return 'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
s = re.sub(self._JSON_RE, _clean_json, s) s = re.sub(self._JSON_RE, _clean_json, s)
s = '<html>' + s + u'</html>' s = '<html>%s</html>' % s
return s return s
doc = self._download_xml(playlist_url, movie, transform_source=fix_html) doc = self._download_xml(playlist_url, movie, transform_source=fix_html)

View File

@@ -24,17 +24,17 @@ class AudiomackIE(InfoExtractor):
}, },
# hosted on soundcloud via audiomack # hosted on soundcloud via audiomack
{ {
'add_ie': ['Soundcloud'],
'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare', 'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
'file': '172419696.mp3', 'info_dict': {
'info_dict': 'id': '172419696',
{
'ext': 'mp3', 'ext': 'mp3',
'description': 'md5:1fc3272ed7a635cce5be1568c2822997',
'title': 'Young Thug ft Lil Wayne - Take Kare', 'title': 'Young Thug ft Lil Wayne - Take Kare',
"upload_date": "20141016", 'uploader': 'Young Thug World',
"description": "New track produced by London On Da Track called “Take Kare\"\n\nhttp://instagram.com/theyoungthugworld\nhttps://www.facebook.com/ThuggerThuggerCashMoney\n", 'upload_date': '20141016',
"uploader": "Young Thug World"
} }
} },
] ]
def _real_extract(self, url): def _real_extract(self, url):

View File

@@ -0,0 +1,93 @@
from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..utils import float_or_none
class AzubuIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?azubu\.tv/[^/]+#!/play/(?P<id>\d+)'
_TESTS = [
{
'url': 'http://www.azubu.tv/GSL#!/play/15575/2014-hot6-cup-last-big-match-ro8-day-1',
'md5': 'a88b42fcf844f29ad6035054bd9ecaf4',
'info_dict': {
'id': '15575',
'ext': 'mp4',
'title': '2014 HOT6 CUP LAST BIG MATCH Ro8 Day 1',
'description': 'md5:d06bdea27b8cc4388a90ad35b5c66c01',
'thumbnail': 're:^https?://.*\.jpe?g',
'timestamp': 1417523507.334,
'upload_date': '20141202',
'duration': 9988.7,
'uploader': 'GSL',
'uploader_id': 414310,
'view_count': int,
},
},
{
'url': 'http://www.azubu.tv/FnaticTV#!/play/9344/-fnatic-at-worlds-2014:-toyz---%22i-love-rekkles,-he-has-amazing-mechanics%22-',
'md5': 'b72a871fe1d9f70bd7673769cdb3b925',
'info_dict': {
'id': '9344',
'ext': 'mp4',
'title': 'Fnatic at Worlds 2014: Toyz - "I love Rekkles, he has amazing mechanics"',
'description': 'md5:4a649737b5f6c8b5c5be543e88dc62af',
'thumbnail': 're:^https?://.*\.jpe?g',
'timestamp': 1410530893.320,
'upload_date': '20140912',
'duration': 172.385,
'uploader': 'FnaticTV',
'uploader_id': 272749,
'view_count': int,
},
},
]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
'http://www.azubu.tv/api/video/%s' % video_id, video_id)['data']
title = data['title'].strip()
description = data['description']
thumbnail = data['thumbnail']
view_count = data['view_count']
uploader = data['user']['username']
uploader_id = data['user']['id']
stream_params = json.loads(data['stream_params'])
timestamp = float_or_none(stream_params['creationDate'], 1000)
duration = float_or_none(stream_params['length'], 1000)
renditions = stream_params.get('renditions') or []
video = stream_params.get('FLVFullLength') or stream_params.get('videoFullLength')
if video:
renditions.append(video)
formats = [{
'url': fmt['url'],
'width': fmt['frameWidth'],
'height': fmt['frameHeight'],
'vbr': float_or_none(fmt['encodingRate'], 1000),
'filesize': fmt['size'],
'vcodec': fmt['videoCodec'],
'container': fmt['videoContainer'],
} for fmt in renditions if fmt['url']]
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'uploader': uploader,
'uploader_id': uploader_id,
'view_count': view_count,
'formats': formats,
}

View File

@@ -18,7 +18,7 @@ class BambuserIE(InfoExtractor):
_TEST = { _TEST = {
'url': 'http://bambuser.com/v/4050584', 'url': 'http://bambuser.com/v/4050584',
# MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388 # MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
# u'md5': 'fba8f7693e48fd4e8641b3fd5539a641', # 'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
'info_dict': { 'info_dict': {
'id': '4050584', 'id': '4050584',
'ext': 'flv', 'ext': 'flv',

View File

@@ -1,9 +1,10 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import xml.etree.ElementTree
from .subtitles import SubtitlesInfoExtractor from .subtitles import SubtitlesInfoExtractor
from ..utils import ExtractorError from ..utils import ExtractorError
from ..compat import compat_HTTPError
class BBCCoUkIE(SubtitlesInfoExtractor): class BBCCoUkIE(SubtitlesInfoExtractor):
@@ -55,7 +56,22 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only', 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
} },
{
'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
'info_dict': {
'id': 'b03k3pb7',
'ext': 'flv',
'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
'description': '2. Invasion',
'duration': 3600,
},
'params': {
# rtmp download
'skip_download': True,
},
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
},
] ]
def _extract_asx_playlist(self, connection, programme_id): def _extract_asx_playlist(self, connection, programme_id):
@@ -102,6 +118,10 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item') return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
def _extract_medias(self, media_selection): def _extract_medias(self, media_selection):
error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error')
if error is not None:
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, error.get('id')), expected=True)
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media') return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
def _extract_connections(self, media): def _extract_connections(self, media):
@@ -158,54 +178,73 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
subtitles[lang] = srt subtitles[lang] = srt
return subtitles return subtitles
def _real_extract(self, url): def _download_media_selector(self, programme_id):
mobj = re.match(self._VALID_URL, url) try:
group_id = mobj.group('id') media_selection = self._download_xml(
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
webpage = self._download_webpage(url, group_id, 'Downloading video page') programme_id, 'Downloading media selection XML')
if re.search(r'id="emp-error" class="notinuk">', webpage): except ExtractorError as ee:
raise ExtractorError('Currently BBC iPlayer TV programmes are available to play in the UK only', if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
expected=True) media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().encode('utf-8'))
playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
'Downloading playlist XML')
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
if no_items is not None:
reason = no_items.get('reason')
if reason == 'preAvailability':
msg = 'Episode %s is not yet available' % group_id
elif reason == 'postAvailability':
msg = 'Episode %s is no longer available' % group_id
else: else:
msg = 'Episode %s is not available: %s' % (group_id, reason) raise
raise ExtractorError(msg, expected=True)
formats = [] formats = []
subtitles = None subtitles = None
for item in self._extract_items(playlist): for media in self._extract_medias(media_selection):
kind = item.get('kind') kind = media.get('kind')
if kind != 'programme' and kind != 'radioProgramme': if kind == 'audio':
continue formats.extend(self._extract_audio(media, programme_id))
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text elif kind == 'video':
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text formats.extend(self._extract_video(media, programme_id))
elif kind == 'captions':
subtitles = self._extract_captions(media, programme_id)
programme_id = item.get('identifier') return formats, subtitles
duration = int(item.get('duration'))
media_selection = self._download_xml( def _real_extract(self, url):
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id, group_id = self._match_id(url)
programme_id, 'Downloading media selection XML')
for media in self._extract_medias(media_selection): webpage = self._download_webpage(url, group_id, 'Downloading video page')
kind = media.get('kind')
if kind == 'audio': programme_id = self._search_regex(
formats.extend(self._extract_audio(media, programme_id)) r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False)
elif kind == 'video': if programme_id:
formats.extend(self._extract_video(media, programme_id)) player = self._download_json(
elif kind == 'captions': 'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id,
subtitles = self._extract_captions(media, programme_id) group_id)['jsConf']['player']
title = player['title']
description = player['subtitle']
duration = player['duration']
formats, subtitles = self._download_media_selector(programme_id)
else:
playlist = self._download_xml(
'http://www.bbc.co.uk/iplayer/playlist/%s' % group_id,
group_id, 'Downloading playlist XML')
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
if no_items is not None:
reason = no_items.get('reason')
if reason == 'preAvailability':
msg = 'Episode %s is not yet available' % group_id
elif reason == 'postAvailability':
msg = 'Episode %s is no longer available' % group_id
elif reason == 'noMedia':
msg = 'Episode %s is not currently available' % group_id
else:
msg = 'Episode %s is not available: %s' % (group_id, reason)
raise ExtractorError(msg, expected=True)
for item in self._extract_items(playlist):
kind = item.get('kind')
if kind != 'programme' and kind != 'radioProgramme':
continue
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
programme_id = item.get('identifier')
duration = int(item.get('duration'))
formats, subtitles = self._download_media_selector(programme_id)
if self._downloader.params.get('listsubtitles', False): if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(programme_id, subtitles) self._list_available_subtitles(programme_id, subtitles)

108
youtube_dl/extractor/bet.py Normal file
View File

@@ -0,0 +1,108 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
xpath_text,
xpath_with_ns,
int_or_none,
parse_iso8601,
)
class BetIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html'
_TESTS = [
{
'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
'info_dict': {
'id': '417cd61c-c793-4e8e-b006-e445ecc45add',
'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
'ext': 'flv',
'title': 'BET News Presents: A Conversation With President Obama',
'description': 'md5:5a88d8ae912c1b33e090290af7ec33c6',
'duration': 1534,
'timestamp': 1418075340,
'upload_date': '20141208',
'uploader': 'admin',
'thumbnail': 're:(?i)^https?://.*\.jpg$',
},
'params': {
# rtmp download
'skip_download': True,
},
},
{
'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
'info_dict': {
'id': '4160e53b-ad41-43b1-980f-8d85f63121f4',
'display_id': 'justice-for-ferguson-a-community-reacts',
'ext': 'flv',
'title': 'Justice for Ferguson: A Community Reacts',
'description': 'A BET News special.',
'duration': 1696,
'timestamp': 1416942360,
'upload_date': '20141125',
'uploader': 'admin',
'thumbnail': 're:(?i)^https?://.*\.jpg$',
},
'params': {
# rtmp download
'skip_download': True,
},
}
]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
media_url = compat_urllib_parse.unquote(self._search_regex(
[r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
webpage, 'media URL'))
mrss = self._download_xml(media_url, display_id)
item = mrss.find('./channel/item')
NS_MAP = {
'dc': 'http://purl.org/dc/elements/1.1/',
'media': 'http://search.yahoo.com/mrss/',
'ka': 'http://kickapps.com/karss',
}
title = xpath_text(item, './title', 'title')
description = xpath_text(
item, './description', 'description', fatal=False)
video_id = xpath_text(item, './guid', 'video id', fatal=False)
timestamp = parse_iso8601(xpath_text(
item, xpath_with_ns('./dc:date', NS_MAP),
'upload date', fatal=False))
uploader = xpath_text(
item, xpath_with_ns('./dc:creator', NS_MAP),
'uploader', fatal=False)
media_content = item.find(
xpath_with_ns('./media:content', NS_MAP))
duration = int_or_none(media_content.get('duration'))
smil_url = media_content.get('url')
thumbnail = media_content.find(
xpath_with_ns('./media:thumbnail', NS_MAP)).get('url')
formats = self._extract_smil_formats(smil_url, display_id)
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'uploader': uploader,
'duration': duration,
'formats': formats,
}

View File

@@ -4,13 +4,17 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor from .subtitles import SubtitlesInfoExtractor
from ..utils import (
compat_urllib_request, from ..compat import (
unescapeHTML,
parse_iso8601,
compat_urlparse,
clean_html,
compat_str, compat_str,
compat_urllib_request,
compat_urlparse,
)
from ..utils import (
clean_html,
int_or_none,
parse_iso8601,
unescapeHTML,
) )
@@ -64,7 +68,39 @@ class BlipTVIE(SubtitlesInfoExtractor):
'uploader': 'redvsblue', 'uploader': 'redvsblue',
'uploader_id': '792887', 'uploader_id': '792887',
} }
} },
{
'url': 'http://blip.tv/play/gbk766dkj4Yn',
'md5': 'fe0a33f022d49399a241e84a8ea8b8e3',
'info_dict': {
'id': '1749452',
'ext': 'mp4',
'upload_date': '20090208',
'description': 'Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers.',
'title': 'Nostalgia Critic: Transformers',
'timestamp': 1234068723,
'uploader': 'NostalgiaCritic',
'uploader_id': '246467',
}
},
{
# https://github.com/rg3/youtube-dl/pull/4404
'note': 'Audio only',
'url': 'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982',
'md5': '76c0a56f24e769ceaab21fbb6416a351',
'info_dict': {
'id': '7103299',
'ext': 'flv',
'title': 'Weekly Manga Recap: Kingdom',
'description': 'And then Shin breaks the enemy line, and he&apos;s all like HWAH! And then he slices a guy and it&apos;s all like FWASHING! And... it&apos;s really hard to describe the best parts of this series without breaking down into sound effects, okay?',
'timestamp': 1417660321,
'upload_date': '20141204',
'uploader': 'The Rollo T',
'uploader_id': '407429',
'duration': 7251,
'vcodec': 'none',
}
},
] ]
def _real_extract(self, url): def _real_extract(self, url):
@@ -74,11 +110,13 @@ class BlipTVIE(SubtitlesInfoExtractor):
# See https://github.com/rg3/youtube-dl/issues/857 and # See https://github.com/rg3/youtube-dl/issues/857 and
# https://github.com/rg3/youtube-dl/issues/4197 # https://github.com/rg3/youtube-dl/issues/4197
if lookup_id: if lookup_id:
info_page = self._download_webpage( urlh = self._request_webpage(
'http://blip.tv/play/%s.x?p=1' % lookup_id, lookup_id, 'Resolving lookup id') 'http://blip.tv/play/%s' % lookup_id, lookup_id, 'Resolving lookup id')
video_id = self._search_regex(r'config\.id\s*=\s*"([0-9]+)', info_page, 'video_id') url = compat_urlparse.urlparse(urlh.geturl())
else: qs = compat_urlparse.parse_qs(url.query)
video_id = mobj.group('id') mobj = re.match(self._VALID_URL, qs['file'][0])
video_id = mobj.group('id')
rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS') rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS')
@@ -114,7 +152,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
msg = self._download_webpage( msg = self._download_webpage(
url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url', url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url',
video_id, 'Resolving URL for %s' % role) video_id, 'Resolving URL for %s' % role)
real_url = compat_urlparse.parse_qs(msg)['message'][0] real_url = compat_urlparse.parse_qs(msg.strip())['message'][0]
media_type = media_content.get('type') media_type = media_content.get('type')
if media_type == 'text/srt' or url.endswith('.srt'): if media_type == 'text/srt' or url.endswith('.srt'):
@@ -129,11 +167,11 @@ class BlipTVIE(SubtitlesInfoExtractor):
'url': real_url, 'url': real_url,
'format_id': role, 'format_id': role,
'format_note': media_type, 'format_note': media_type,
'vcodec': media_content.get(blip('vcodec')), 'vcodec': media_content.get(blip('vcodec')) or 'none',
'acodec': media_content.get(blip('acodec')), 'acodec': media_content.get(blip('acodec')),
'filesize': media_content.get('filesize'), 'filesize': media_content.get('filesize'),
'width': int(media_content.get('width')), 'width': int_or_none(media_content.get('width')),
'height': int(media_content.get('height')), 'height': int_or_none(media_content.get('height')),
}) })
self._sort_formats(formats) self._sort_formats(formats)

View File

@@ -14,7 +14,6 @@ class BreakIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P<id>\d+)' _VALID_URL = r'http://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056', 'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
'md5': '33aa4ff477ecd124d18d7b5d23b87ce5',
'info_dict': { 'info_dict': {
'id': '2468056', 'id': '2468056',
'ext': 'mp4', 'ext': 'mp4',

View File

@@ -265,6 +265,7 @@ class BrightcoveIE(InfoExtractor):
url = rend['defaultURL'] url = rend['defaultURL']
if not url: if not url:
continue continue
ext = None
if rend['remote']: if rend['remote']:
url_comp = compat_urllib_parse_urlparse(url) url_comp = compat_urllib_parse_urlparse(url)
if url_comp.path.endswith('.m3u8'): if url_comp.path.endswith('.m3u8'):
@@ -276,7 +277,7 @@ class BrightcoveIE(InfoExtractor):
# akamaihd.net, but they don't use f4m manifests # akamaihd.net, but they don't use f4m manifests
url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB' url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB'
ext = 'flv' ext = 'flv'
else: if ext is None:
ext = determine_ext(url) ext = determine_ext(url)
size = rend.get('size') size = rend.get('size')
formats.append({ formats.append({

View File

@@ -0,0 +1,74 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
class BuzzFeedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?buzzfeed\.com/[^?#]*?/(?P<id>[^?#]+)'
_TESTS = [{
'url': 'http://www.buzzfeed.com/abagg/this-angry-ram-destroys-a-punching-bag-like-a-boss?utm_term=4ldqpia',
'info_dict': {
'id': 'this-angry-ram-destroys-a-punching-bag-like-a-boss',
'title': 'This Angry Ram Destroys A Punching Bag Like A Boss',
'description': 'Rambro!',
},
'playlist': [{
'info_dict': {
'id': 'aVCR29aE_OQ',
'ext': 'mp4',
'upload_date': '20141024',
'uploader_id': 'Buddhanz1',
'description': 'He likes to stay in shape with his heavy bag, he wont stop until its on the ground\n\nFollow Angry Ram on Facebook for regular updates -\nhttps://www.facebook.com/pages/Angry-Ram/1436897249899558?ref=hl',
'uploader': 'Buddhanz',
'title': 'Angry Ram destroys a punching bag',
}
}]
}, {
'url': 'http://www.buzzfeed.com/sheridanwatson/look-at-this-cute-dog-omg?utm_term=4ldqpia',
'params': {
'skip_download': True, # Got enough YouTube download tests
},
'info_dict': {
'description': 'Munchkin the Teddy Bear is back !',
'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
},
'playlist': [{
'info_dict': {
'id': 'mVmBL8B-In0',
'ext': 'mp4',
'upload_date': '20141124',
'uploader_id': 'CindysMunchkin',
'description': '© 2014 Munchkin the Shih Tzu\nAll rights reserved\nFacebook: http://facebook.com/MunchkintheShihTzu',
'uploader': 'Munchkin the Shih Tzu',
'title': 'Munchkin the Teddy Bear gets her exercise',
},
}]
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
all_buckets = re.findall(
r'(?s)<div class="video-embed[^"]*"..*?rel:bf_bucket_data=\'([^\']+)\'',
webpage)
entries = []
for bd_json in all_buckets:
bd = json.loads(bd_json)
video = bd.get('video') or bd.get('progload_video')
if not video:
continue
entries.append(self.url_result(video['url']))
return {
'_type': 'playlist',
'id': playlist_id,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'entries': entries,
}

View File

@@ -45,4 +45,4 @@ class CBSIE(InfoExtractor):
real_id = self._search_regex( real_id = self._search_regex(
r"video\.settings\.pid\s*=\s*'([^']+)';", r"video\.settings\.pid\s*=\s*'([^']+)';",
webpage, 'real video ID') webpage, 'real video ID')
return self.url_result(u'theplatform:%s' % real_id) return self.url_result('theplatform:%s' % real_id)

View File

@@ -24,7 +24,7 @@ class ClipfishIE(InfoExtractor):
'title': 'FIFA 14 - E3 2013 Trailer', 'title': 'FIFA 14 - E3 2013 Trailer',
'duration': 82, 'duration': 82,
}, },
u'skip': 'Blocked in the US' 'skip': 'Blocked in the US'
} }
def _real_extract(self, url): def _real_extract(self, url):
@@ -34,7 +34,7 @@ class ClipfishIE(InfoExtractor):
info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' % info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
(video_id, int(time.time()))) (video_id, int(time.time())))
doc = self._download_xml( doc = self._download_xml(
info_url, video_id, note=u'Downloading info page') info_url, video_id, note='Downloading info page')
title = doc.find('title').text title = doc.find('title').text
video_url = doc.find('filename').text video_url = doc.find('filename').text
if video_url is None: if video_url is None:

View File

@@ -13,6 +13,7 @@ import time
import xml.etree.ElementTree import xml.etree.ElementTree
from ..compat import ( from ..compat import (
compat_cookiejar,
compat_http_client, compat_http_client,
compat_urllib_error, compat_urllib_error,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
@@ -157,8 +158,8 @@ class InfoExtractor(object):
_type "playlist" indicates multiple videos. _type "playlist" indicates multiple videos.
There must be a key "entries", which is a list or a PagedList object, each There must be a key "entries", which is a list, an iterable, or a PagedList
element of which is a valid dictionary under this specfication. object, each element of which is a valid dictionary by this specification.
Additionally, playlists can have "title" and "id" attributes with the same Additionally, playlists can have "title" and "id" attributes with the same
semantics as videos (see above). semantics as videos (see above).
@@ -173,9 +174,10 @@ class InfoExtractor(object):
_type "url" indicates that the video must be extracted from another _type "url" indicates that the video must be extracted from another
location, possibly by a different extractor. Its only required key is: location, possibly by a different extractor. Its only required key is:
"url" - the next URL to extract. "url" - the next URL to extract.
The key "ie_key" can be set to the class name (minus the trailing "IE",
Additionally, it may have properties believed to be identical to the e.g. "Youtube") if the extractor class is known in advance.
resolved entity, for example "title" if the title of the referred video is Additionally, the dictionary may have any properties of the resolved entity
known in advance, for example "title" if the title of the referred video is
known ahead of time. known ahead of time.
@@ -296,9 +298,11 @@ class InfoExtractor(object):
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal) content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal)
return (content, urlh) return (content, urlh)
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True): def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None):
content_type = urlh.headers.get('Content-Type', '') content_type = urlh.headers.get('Content-Type', '')
webpage_bytes = urlh.read() webpage_bytes = urlh.read()
if prefix is not None:
webpage_bytes = prefix + webpage_bytes
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
if m: if m:
encoding = m.group(1) encoding = m.group(1)
@@ -436,7 +440,7 @@ class InfoExtractor(object):
return video_info return video_info
@staticmethod @staticmethod
def playlist_result(entries, playlist_id=None, playlist_title=None): def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
"""Returns a playlist""" """Returns a playlist"""
video_info = {'_type': 'playlist', video_info = {'_type': 'playlist',
'entries': entries} 'entries': entries}
@@ -444,6 +448,8 @@ class InfoExtractor(object):
video_info['id'] = playlist_id video_info['id'] = playlist_id
if playlist_title: if playlist_title:
video_info['title'] = playlist_title video_info['title'] = playlist_title
if playlist_description:
video_info['description'] = playlist_description
return video_info return video_info
def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None): def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
@@ -787,6 +793,49 @@ class InfoExtractor(object):
self._sort_formats(formats) self._sort_formats(formats)
return formats return formats
# TODO: improve extraction
def _extract_smil_formats(self, smil_url, video_id):
smil = self._download_xml(
smil_url, video_id, 'Downloading SMIL file',
'Unable to download SMIL file')
base = smil.find('./head/meta').get('base')
formats = []
rtmp_count = 0
for video in smil.findall('./body/switch/video'):
src = video.get('src')
if not src:
continue
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
width = int_or_none(video.get('width'))
height = int_or_none(video.get('height'))
proto = video.get('proto')
if not proto:
if base:
if base.startswith('rtmp'):
proto = 'rtmp'
elif base.startswith('http'):
proto = 'http'
ext = video.get('ext')
if proto == 'm3u8':
formats.extend(self._extract_m3u8_formats(src, video_id, ext))
elif proto == 'rtmp':
rtmp_count += 1
streamer = video.get('streamer') or base
formats.append({
'url': streamer,
'play_path': src,
'ext': 'flv',
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
'tbr': bitrate,
'width': width,
'height': height,
})
self._sort_formats(formats)
return formats
def _live_title(self, name): def _live_title(self, name):
""" Generate the title for a live video """ """ Generate the title for a live video """
now = datetime.datetime.now() now = datetime.datetime.now()
@@ -815,6 +864,12 @@ class InfoExtractor(object):
self._downloader.report_warning(msg) self._downloader.report_warning(msg)
return res return res
def _set_cookie(self, domain, name, value, expire_time=None):
cookie = compat_cookiejar.Cookie(
0, name, value, None, None, domain, None,
None, '/', True, False, expire_time, '', None, None, None)
self._downloader.cookiejar.set_cookie(cookie)
class SearchInfoExtractor(InfoExtractor): class SearchInfoExtractor(InfoExtractor):
""" """

View File

@@ -125,7 +125,7 @@ class EightTracksIE(InfoExtractor):
info = { info = {
'id': compat_str(track_data['id']), 'id': compat_str(track_data['id']),
'url': track_data['track_file_stream_url'], 'url': track_data['track_file_stream_url'],
'title': track_data['performer'] + u' - ' + track_data['name'], 'title': track_data['performer'] + ' - ' + track_data['name'],
'raw_title': track_data['name'], 'raw_title': track_data['name'],
'uploader_id': data['user']['login'], 'uploader_id': data['user']['login'],
'ext': 'm4a', 'ext': 'm4a',

View File

@@ -0,0 +1,48 @@
from __future__ import unicode_literals
from .common import InfoExtractor
class FoxgayIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml'
_TEST = {
'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml',
'md5': '80d72beab5d04e1655a56ad37afe6841',
'info_dict': {
'id': '2582',
'ext': 'mp4',
'title': 'md5:6122f7ae0fc6b21ebdf59c5e083ce25a',
'description': 'md5:5e51dc4405f1fd315f7927daed2ce5cf',
'age_limit': 18,
'thumbnail': 're:https?://.*\.jpg$',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
r'<title>(?P<title>.*?)</title>',
webpage, 'title', fatal=False)
description = self._html_search_regex(
r'<div class="ico_desc"><h2>(?P<description>.*?)</h2>',
webpage, 'description', fatal=False)
# Find the URL for the iFrame which contains the actual video.
iframe = self._download_webpage(
self._html_search_regex(r'iframe src="(?P<frame>.*?)"', webpage, 'video frame'),
video_id)
video_url = self._html_search_regex(
r"v_path = '(?P<vid>http://.*?)'", iframe, 'url')
thumb_url = self._html_search_regex(
r"t_path = '(?P<thumb>http://.*?)'", iframe, 'thumbnail', fatal=False)
return {
'id': video_id,
'title': title,
'url': video_url,
'description': description,
'thumbnail': thumb_url,
'age_limit': 18,
}

View File

@@ -0,0 +1,94 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
int_or_none,
)
class FoxNewsIE(InfoExtractor):
_VALID_URL = r'https?://video\.foxnews\.com/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
_TESTS = [
{
'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips',
'md5': '32aaded6ba3ef0d1c04e238d01031e5e',
'info_dict': {
'id': '3937480',
'ext': 'flv',
'title': 'Frozen in Time',
'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler',
'duration': 265,
'timestamp': 1304411491,
'upload_date': '20110503',
'thumbnail': 're:^https?://.*\.jpg$',
},
},
{
'url': 'http://video.foxnews.com/v/3922535568001/rep-luis-gutierrez-on-if-obamas-immigration-plan-is-legal/#sp=show-clips',
'md5': '5846c64a1ea05ec78175421b8323e2df',
'info_dict': {
'id': '3922535568001',
'ext': 'mp4',
'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
'description': "Congressman discusses the president's executive action",
'duration': 292,
'timestamp': 1417662047,
'upload_date': '20141204',
'thumbnail': 're:^https?://.*\.jpg$',
},
},
{
'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',
'only_matching': True,
},
]
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_json(
'http://video.foxnews.com/v/feed/video/%s.js?template=fox' % video_id, video_id)
item = video['channel']['item']
title = item['title']
description = item['description']
timestamp = parse_iso8601(item['dc-date'])
media_group = item['media-group']
duration = None
formats = []
for media in media_group['media-content']:
attributes = media['@attributes']
video_url = attributes['url']
if video_url.endswith('.f4m'):
formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id))
elif video_url.endswith('.m3u8'):
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv'))
elif not video_url.endswith('.smil'):
duration = int_or_none(attributes.get('duration'))
formats.append({
'url': video_url,
'format_id': media['media-category']['@attributes']['label'],
'preference': 1,
'vbr': int_or_none(attributes.get('bitrate')),
'filesize': int_or_none(attributes.get('fileSize'))
})
self._sort_formats(formats)
media_thumbnail = media_group['media-thumbnail']['@attributes']
thumbnails = [{
'url': media_thumbnail['url'],
'width': int_or_none(media_thumbnail.get('width')),
'height': int_or_none(media_thumbnail.get('height')),
}] if media_thumbnail else []
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'timestamp': timestamp,
'formats': formats,
'thumbnails': thumbnails,
}

View File

@@ -40,8 +40,6 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
else: else:
georestricted = False georestricted = False
formats = [] formats = []
for video in info['videos']: for video in info['videos']:
if video['statut'] != 'ONLINE': if video['statut'] != 'ONLINE':

View File

@@ -11,7 +11,7 @@ class GamekingsIE(InfoExtractor):
'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/', 'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
# MD5 is flaky, seems to change regularly # MD5 is flaky, seems to change regularly
# 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3', # 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
u'info_dict': { 'info_dict': {
'id': '20130811', 'id': '20130811',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review', 'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',

View File

@@ -452,7 +452,23 @@ class GenericIE(InfoExtractor):
'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse', 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
}, },
'playlist_mincount': 2, 'playlist_mincount': 2,
},
# Direct link with incorrect MIME type
{
'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
'md5': '4ccbebe5f36706d85221f204d7eb5913',
'info_dict': {
'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
'id': '5_Lennart_Poettering_-_Systemd',
'ext': 'webm',
'title': '5_Lennart_Poettering_-_Systemd',
'upload_date': '20141120',
},
'expected_warnings': [
'URL could be a direct video link, returning it as such.'
]
} }
] ]
def report_following_redirect(self, new_url): def report_following_redirect(self, new_url):
@@ -606,10 +622,28 @@ class GenericIE(InfoExtractor):
if not self._downloader.params.get('test', False) and not is_intentional: if not self._downloader.params.get('test', False) and not is_intentional:
self._downloader.report_warning('Falling back on generic information extractor.') self._downloader.report_warning('Falling back on generic information extractor.')
if full_response: if not full_response:
webpage = self._webpage_read_content(full_response, url, video_id) full_response = self._request_webpage(url, video_id)
else:
webpage = self._download_webpage(url, video_id) # Maybe it's a direct link to a video?
# Be careful not to download the whole thing!
first_bytes = full_response.read(512)
if not re.match(r'^\s*<', first_bytes.decode('utf-8', 'replace')):
self._downloader.report_warning(
'URL could be a direct video link, returning it as such.')
upload_date = unified_strdate(
head_response.headers.get('Last-Modified'))
return {
'id': video_id,
'title': os.path.splitext(url_basename(url))[0],
'direct': True,
'url': url,
'upload_date': upload_date,
}
webpage = self._webpage_read_content(
full_response, url, video_id, prefix=first_bytes)
self.report_extraction(video_id) self.report_extraction(video_id)
# Is it an RSS feed? # Is it an RSS feed?

View File

@@ -9,14 +9,15 @@ from ..utils import (
determine_ext, determine_ext,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request, compat_urllib_request,
int_or_none,
) )
class GorillaVidIE(InfoExtractor): class GorillaVidIE(InfoExtractor):
IE_DESC = 'GorillaVid.in, daclips.in and movpod.in' IE_DESC = 'GorillaVid.in, daclips.in, movpod.in and fastvideo.in'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?://(?P<host>(?:www\.)? https?://(?P<host>(?:www\.)?
(?:daclips\.in|gorillavid\.in|movpod\.in))/ (?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in))/
(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)? (?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
''' '''
@@ -49,6 +50,16 @@ class GorillaVidIE(InfoExtractor):
'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc', 'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc',
'thumbnail': 're:http://.*\.jpg', 'thumbnail': 're:http://.*\.jpg',
} }
}, {
# video with countdown timeout
'url': 'http://fastvideo.in/1qmdn1lmsmbw',
'md5': '8b87ec3f6564a3108a0e8e66594842ba',
'info_dict': {
'id': '1qmdn1lmsmbw',
'ext': 'mp4',
'title': 'Man of Steel - Trailer',
'thumbnail': 're:http://.*\.jpg',
},
}, { }, {
'url': 'http://movpod.in/0wguyyxi1yca', 'url': 'http://movpod.in/0wguyyxi1yca',
'only_matching': True, 'only_matching': True,
@@ -71,6 +82,12 @@ class GorillaVidIE(InfoExtractor):
''', webpage)) ''', webpage))
if fields['op'] == 'download1': if fields['op'] == 'download1':
countdown = int_or_none(self._search_regex(
r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>',
webpage, 'countdown', default=None))
if countdown:
self._sleep(countdown, video_id)
post = compat_urllib_parse.urlencode(fields) post = compat_urllib_parse.urlencode(fields)
req = compat_urllib_request.Request(url, post) req = compat_urllib_request.Request(url, post)
@@ -78,9 +95,13 @@ class GorillaVidIE(InfoExtractor):
webpage = self._download_webpage(req, video_id, 'Downloading video page') webpage = self._download_webpage(req, video_id, 'Downloading video page')
title = self._search_regex(r'style="z-index: [0-9]+;">([^<]+)</span>', webpage, 'title') title = self._search_regex(
video_url = self._search_regex(r'file\s*:\s*\'(http[^\']+)\',', webpage, 'file url') r'style="z-index: [0-9]+;">([^<]+)</span>',
thumbnail = self._search_regex(r'image\s*:\s*\'(http[^\']+)\',', webpage, 'thumbnail', fatal=False) webpage, 'title', default=None) or self._og_search_title(webpage)
video_url = self._search_regex(
r'file\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'file url')
thumbnail = self._search_regex(
r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', fatal=False)
formats = [{ formats = [{
'format_id': 'sd', 'format_id': 'sd',

View File

@@ -1,12 +1,13 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import base64 import base64
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..compat import (
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request, compat_urllib_request,
)
from ..utils import (
ExtractorError, ExtractorError,
HEADRequest, HEADRequest,
) )
@@ -16,25 +17,24 @@ class HotNewHipHopIE(InfoExtractor):
_VALID_URL = r'http://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html' _VALID_URL = r'http://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
_TEST = { _TEST = {
'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html', 'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',
'file': '1435540.mp3',
'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96', 'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96',
'info_dict': { 'info_dict': {
'id': '1435540',
'ext': 'mp3',
'title': 'Freddie Gibbs - Lay It Down' 'title': 'Freddie Gibbs - Lay It Down'
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = m.group('id') webpage = self._download_webpage(url, video_id)
webpage_src = self._download_webpage(url, video_id)
video_url_base64 = self._search_regex( video_url_base64 = self._search_regex(
r'data-path="(.*?)"', webpage_src, u'video URL', fatal=False) r'data-path="(.*?)"', webpage, 'video URL', default=None)
if video_url_base64 is None: if video_url_base64 is None:
video_url = self._search_regex( video_url = self._search_regex(
r'"contentUrl" content="(.*?)"', webpage_src, u'video URL') r'"contentUrl" content="(.*?)"', webpage, 'content URL')
return self.url_result(video_url, ie='Youtube') return self.url_result(video_url, ie='Youtube')
reqdata = compat_urllib_parse.urlencode([ reqdata = compat_urllib_parse.urlencode([
@@ -59,11 +59,11 @@ class HotNewHipHopIE(InfoExtractor):
if video_url.endswith('.html'): if video_url.endswith('.html'):
raise ExtractorError('Redirect failed') raise ExtractorError('Redirect failed')
video_title = self._og_search_title(webpage_src).strip() video_title = self._og_search_title(webpage).strip()
return { return {
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_url,
'title': video_title, 'title': video_title,
'thumbnail': self._og_search_thumbnail(webpage_src), 'thumbnail': self._og_search_thumbnail(webpage),
} }

View File

@@ -0,0 +1,72 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
int_or_none,
parse_duration,
parse_filesize,
)
class MinhatecaIE(InfoExtractor):
_VALID_URL = r'https?://minhateca\.com\.br/[^?#]+,(?P<id>[0-9]+)\.'
_TEST = {
'url': 'http://minhateca.com.br/pereba/misc/youtube-dl+test+video,125848331.mp4(video)',
'info_dict': {
'id': '125848331',
'ext': 'mp4',
'title': 'youtube-dl test video',
'thumbnail': 're:^https?://.*\.jpg$',
'filesize_approx': 1530000,
'duration': 9,
'view_count': int,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
token = self._html_search_regex(
r'<input name="__RequestVerificationToken".*?value="([^"]+)"',
webpage, 'request token')
token_data = [
('fileId', video_id),
('__RequestVerificationToken', token),
]
req = compat_urllib_request.Request(
'http://minhateca.com.br/action/License/Download',
data=compat_urllib_parse.urlencode(token_data))
req.add_header('Content-Type', 'application/x-www-form-urlencoded')
data = self._download_json(
req, video_id, note='Downloading metadata')
video_url = data['redirectUrl']
title_str = self._html_search_regex(
r'<h1.*?>(.*?)</h1>', webpage, 'title')
title, _, ext = title_str.rpartition('.')
filesize_approx = parse_filesize(self._html_search_regex(
r'<p class="fileSize">(.*?)</p>',
webpage, 'file size approximation', fatal=False))
duration = parse_duration(self._html_search_regex(
r'(?s)<p class="fileLeng[ht][th]">.*?class="bold">(.*?)<',
webpage, 'duration', fatal=False))
view_count = int_or_none(self._html_search_regex(
r'<p class="downloadsCounter">([0-9]+)</p>',
webpage, 'view count', fatal=False))
return {
'id': video_id,
'url': video_url,
'title': title,
'ext': ext,
'filesize_approx': filesize_approx,
'duration': duration,
'view_count': view_count,
'thumbnail': self._og_search_thumbnail(webpage),
}

View File

@@ -49,7 +49,7 @@ class MooshareIE(InfoExtractor):
page = self._download_webpage(url, video_id, 'Downloading page') page = self._download_webpage(url, video_id, 'Downloading page')
if re.search(r'>Video Not Found or Deleted<', page) is not None: if re.search(r'>Video Not Found or Deleted<', page) is not None:
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True) raise ExtractorError('Video %s does not exist' % video_id, expected=True)
hash_key = self._html_search_regex(r'<input type="hidden" name="hash" value="([^"]+)">', page, 'hash') hash_key = self._html_search_regex(r'<input type="hidden" name="hash" value="([^"]+)">', page, 'hash')
title = self._html_search_regex(r'(?m)<div class="blockTitle">\s*<h2>Watch ([^<]+)</h2>', page, 'title') title = self._html_search_regex(r'(?m)<div class="blockTitle">\s*<h2>Watch ([^<]+)</h2>', page, 'title')

View File

@@ -164,7 +164,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
if mgid is None or ':' not in mgid: if mgid is None or ':' not in mgid:
mgid = self._search_regex( mgid = self._search_regex(
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'], [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
webpage, u'mgid') webpage, 'mgid')
return self._get_videos_info(mgid) return self._get_videos_info(mgid)

View File

@@ -1,64 +1,65 @@
import re from __future__ import unicode_literals
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..compat import (
compat_urllib_parse, compat_urllib_parse,
determine_ext,
) )
class MuzuTVIE(InfoExtractor): class MuzuTVIE(InfoExtractor):
_VALID_URL = r'https?://www\.muzu\.tv/(.+?)/(.+?)/(?P<id>\d+)' _VALID_URL = r'https?://www\.muzu\.tv/(.+?)/(.+?)/(?P<id>\d+)'
IE_NAME = u'muzu.tv' IE_NAME = 'muzu.tv'
_TEST = { _TEST = {
u'url': u'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/', 'url': 'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/',
u'file': u'1981454.mp4', 'md5': '98f8b2c7bc50578d6a0364fff2bfb000',
u'md5': u'98f8b2c7bc50578d6a0364fff2bfb000', 'info_dict': {
u'info_dict': { 'id': '1981454',
u'title': u'Cat Walk (Original Mix)', 'ext': 'mp4',
u'description': u'md5:90e868994de201b2570e4e5854e19420', 'title': 'Cat Walk (Original Mix)',
u'uploader': u'MarcAshken featuring SOS', 'description': 'md5:90e868994de201b2570e4e5854e19420',
'uploader': 'MarcAshken featuring SOS',
}, },
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
info_data = compat_urllib_parse.urlencode({'format': 'json', info_data = compat_urllib_parse.urlencode({
'url': url, 'format': 'json',
}) 'url': url,
video_info_page = self._download_webpage('http://www.muzu.tv/api/oembed/?%s' % info_data, })
video_id, u'Downloading video info') info = self._download_json(
info = json.loads(video_info_page) 'http://www.muzu.tv/api/oembed/?%s' % info_data,
video_id, 'Downloading video info')
player_info_page = self._download_webpage('http://player.muzu.tv/player/playerInit?ai=%s' % video_id, player_info = self._download_json(
video_id, u'Downloading player info') 'http://player.muzu.tv/player/playerInit?ai=%s' % video_id,
video_info = json.loads(player_info_page)['videos'][0] video_id, 'Downloading player info')
video_info = player_info['videos'][0]
for quality in ['1080', '720', '480', '360']: for quality in ['1080', '720', '480', '360']:
if video_info.get('v%s' % quality): if video_info.get('v%s' % quality):
break break
data = compat_urllib_parse.urlencode({'ai': video_id, data = compat_urllib_parse.urlencode({
# Even if each time you watch a video the hash changes, 'ai': video_id,
# it seems to work for different videos, and it will work # Even if each time you watch a video the hash changes,
# even if you use any non empty string as a hash # it seems to work for different videos, and it will work
'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k', # even if you use any non empty string as a hash
'device': 'web', 'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k',
'qv': quality, 'device': 'web',
}) 'qv': quality,
video_url_page = self._download_webpage('http://player.muzu.tv/player/requestVideo?%s' % data, })
video_id, u'Downloading video url') video_url_info = self._download_json(
video_url_info = json.loads(video_url_page) 'http://player.muzu.tv/player/requestVideo?%s' % data,
video_id, 'Downloading video url')
video_url = video_url_info['url'] video_url = video_url_info['url']
return {'id': video_id, return {
'title': info['title'], 'id': video_id,
'url': video_url, 'title': info['title'],
'ext': determine_ext(video_url), 'url': video_url,
'thumbnail': info['thumbnail_url'], 'thumbnail': info['thumbnail_url'],
'description': info['description'], 'description': info['description'],
'uploader': info['author_name'], 'uploader': info['author_name'],
} }

View File

@@ -1,3 +1,4 @@
# encoding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
@@ -7,6 +8,7 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_str, compat_str,
) )
from ..utils import ExtractorError
class MySpaceIE(InfoExtractor): class MySpaceIE(InfoExtractor):
@@ -14,33 +16,58 @@ class MySpaceIE(InfoExtractor):
_TESTS = [ _TESTS = [
{ {
'url': 'https://myspace.com/coldplay/video/viva-la-vida/100008689', 'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
'info_dict': { 'info_dict': {
'id': '100008689', 'id': '109594919',
'ext': 'flv', 'ext': 'flv',
'title': 'Viva La Vida', 'title': 'Little Big Town',
'description': 'The official Viva La Vida video, directed by Hype Williams', 'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
'uploader': 'Coldplay', 'uploader': 'Five Minutes to the Stage',
'uploader_id': 'coldplay', 'uploader_id': 'fiveminutestothestage',
}, },
'params': { 'params': {
# rtmp download # rtmp download
'skip_download': True, 'skip_download': True,
}, },
}, },
# song # songs
{ {
'url': 'https://myspace.com/spiderbags/music/song/darkness-in-my-heart-39008454-27041242', 'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
'info_dict': { 'info_dict': {
'id': '39008454', 'id': '93388656',
'ext': 'flv', 'ext': 'flv',
'title': 'Darkness In My Heart', 'title': 'Of weakened soul...',
'uploader_id': 'spiderbags', 'uploader': 'Killsorrow',
'uploader_id': 'killsorrow',
}, },
'params': { 'params': {
# rtmp download # rtmp download
'skip_download': True, 'skip_download': True,
}, },
}, {
'add_ie': ['Vevo'],
'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
'info_dict': {
'id': 'USZM20600099',
'ext': 'mp4',
'title': 'Animal I Have Become',
'uploader': 'Three Days Grace',
'timestamp': int,
'upload_date': '20060502',
},
'skip': 'VEVO is only available in some countries',
}, {
'add_ie': ['Youtube'],
'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
'info_dict': {
'id': 'ypWvQgnJrSU',
'ext': 'mp4',
'title': 'Starset - First Light',
'description': 'md5:2d5db6c9d11d527683bcda818d332414',
'uploader': 'Jacob Soren',
'uploader_id': 'SorenPromotions',
'upload_date': '20140725',
}
}, },
] ]
@@ -48,16 +75,41 @@ class MySpaceIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
player_url = self._search_regex(
r'playerSwf":"([^"?]*)', webpage, 'player URL')
if mobj.group('mediatype').startswith('music/song'): if mobj.group('mediatype').startswith('music/song'):
# songs don't store any useful info in the 'context' variable # songs don't store any useful info in the 'context' variable
song_data = self._search_regex(
r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id,
webpage, 'song_data', default=None, group=0)
if song_data is None:
# some songs in an album are not playable
self.report_warning(
'%s: No downloadable song on this page' % video_id)
return
def search_data(name): def search_data(name):
return self._search_regex( return self._search_regex(
r'data-%s="(.*?)"' % name, webpage, name) r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
song_data, name, default='', group='data')
streamUrl = search_data('stream-url') streamUrl = search_data('stream-url')
if not streamUrl:
vevo_id = search_data('vevo-id')
youtube_id = search_data('youtube-id')
if vevo_id:
self.to_screen('Vevo video detected: %s' % vevo_id)
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
elif youtube_id:
self.to_screen('Youtube video detected: %s' % youtube_id)
return self.url_result(youtube_id, ie='Youtube')
else:
raise ExtractorError(
'Found song but don\'t know how to download it')
info = { info = {
'id': video_id, 'id': video_id,
'title': self._og_search_title(webpage), 'title': self._og_search_title(webpage),
'uploader': search_data('artist-name'),
'uploader_id': search_data('artist-username'), 'uploader_id': search_data('artist-username'),
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),
} }
@@ -79,6 +131,50 @@ class MySpaceIE(InfoExtractor):
info.update({ info.update({
'url': rtmp_url, 'url': rtmp_url,
'play_path': play_path, 'play_path': play_path,
'player_url': player_url,
'ext': 'flv', 'ext': 'flv',
}) })
return info return info
class MySpaceAlbumIE(InfoExtractor):
IE_NAME = 'MySpace:album'
_VALID_URL = r'https?://myspace\.com/([^/]+)/music/album/(?P<title>.*-)(?P<id>\d+)'
_TESTS = [{
'url': 'https://myspace.com/starset2/music/album/transmissions-19455773',
'info_dict': {
'title': 'Transmissions',
'id': '19455773',
},
'playlist_count': 14,
'skip': 'this album is only available in some countries',
}, {
'url': 'https://myspace.com/killsorrow/music/album/the-demo-18596029',
'info_dict': {
'title': 'The Demo',
'id': '18596029',
},
'playlist_count': 5,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
display_id = mobj.group('title') + playlist_id
webpage = self._download_webpage(url, display_id)
tracks_paths = re.findall(r'"music:song" content="(.*?)"', webpage)
if not tracks_paths:
raise ExtractorError(
'%s: No songs found, try using proxy' % display_id,
expected=True)
entries = [
self.url_result(t_path, ie=MySpaceIE.ie_key())
for t_path in tracks_paths]
return {
'_type': 'playlist',
'id': playlist_id,
'display_id': display_id,
'title': self._og_search_title(webpage),
'entries': entries,
}

View File

@@ -0,0 +1,29 @@
from __future__ import unicode_literals
from .common import InfoExtractor
class MyVidsterIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?myvidster\.com/video/(?P<id>\d+)/'
_TEST = {
'url': 'http://www.myvidster.com/video/32059805/Hot_chemistry_with_raw_love_making',
'md5': '95296d0231c1363222c3441af62dc4ca',
'info_dict': {
'id': '3685814',
'title': 'md5:7d8427d6d02c4fbcef50fe269980c749',
'upload_date': '20141027',
'uploader_id': 'utkualp',
'ext': 'mp4',
'age_limit': 18,
},
'add_ie': ['XHamster'],
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
return self.url_result(self._html_search_regex(
r'rel="videolink" href="(?P<real_url>.*)">',
webpage, 'real video url'))

View File

@@ -1,7 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
remove_end, remove_end,
@@ -10,8 +8,8 @@ from ..utils import (
class NBAIE(InfoExtractor): class NBAIE(InfoExtractor):
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$' _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)/?(?:/index\.html)?(?:\?.*)?$'
_TEST = { _TESTS = [{
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html', 'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
'md5': 'c0edcfc37607344e2ff8f13c378c88a4', 'md5': 'c0edcfc37607344e2ff8f13c378c88a4',
'info_dict': { 'info_dict': {
@@ -21,12 +19,13 @@ class NBAIE(InfoExtractor):
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.', 'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
'duration': 181, 'duration': 181,
}, },
} }, {
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4' video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
@@ -37,7 +36,7 @@ class NBAIE(InfoExtractor):
description = self._og_search_description(webpage) description = self._og_search_description(webpage)
duration = parse_duration( duration = parse_duration(
self._html_search_meta('duration', webpage, 'duration', fatal=False)) self._html_search_meta('duration', webpage, 'duration'))
return { return {
'id': shortened_video_id, 'id': shortened_video_id,

View File

@@ -2,11 +2,15 @@ from __future__ import unicode_literals
import re import re
import json import json
import os
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..compat import (
compat_urlparse, compat_urlparse,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_parse_urlparse
)
from ..utils import (
unified_strdate, unified_strdate,
) )
@@ -22,9 +26,12 @@ class NHLBaseInfoExtractor(InfoExtractor):
initial_video_url = info['publishPoint'] initial_video_url = info['publishPoint']
if info['formats'] == '1': if info['formats'] == '1':
parsed_url = compat_urllib_parse_urlparse(initial_video_url)
filename, ext = os.path.splitext(parsed_url.path)
path = '%s_sd%s' % (filename, ext)
data = compat_urllib_parse.urlencode({ data = compat_urllib_parse.urlencode({
'type': 'fvod', 'type': 'fvod',
'path': initial_video_url.replace('.mp4', '_sd.mp4'), 'path': compat_urlparse.urlunparse(parsed_url[:2] + (path,) + parsed_url[3:])
}) })
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
path_doc = self._download_xml( path_doc = self._download_xml(
@@ -71,6 +78,17 @@ class NHLIE(NHLBaseInfoExtractor):
'duration': 0, 'duration': 0,
'upload_date': '20141011', 'upload_date': '20141011',
}, },
}, {
'url': 'http://video.mapleleafs.nhl.com/videocenter/console?id=58665&catid=802',
'md5': 'c78fc64ea01777e426cfc202b746c825',
'info_dict': {
'id': '58665',
'ext': 'flv',
'title': 'Classic Game In Six - April 22, 1979',
'description': 'It was the last playoff game for the Leafs in the decade, and the last time the Leafs and Habs played in the playoffs. Great game, not a great ending.',
'duration': 400,
'upload_date': '20100129'
},
}, { }, {
'url': 'http://video.flames.nhl.com/videocenter/console?id=630616', 'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
'only_matching': True, 'only_matching': True,
@@ -88,7 +106,7 @@ class NHLIE(NHLBaseInfoExtractor):
class NHLVideocenterIE(NHLBaseInfoExtractor): class NHLVideocenterIE(NHLBaseInfoExtractor):
IE_NAME = 'nhl.com:videocenter' IE_NAME = 'nhl.com:videocenter'
IE_DESC = 'NHL videocenter category' IE_DESC = 'NHL videocenter category'
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$' _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?[^(id=)]*catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
_TEST = { _TEST = {
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999', 'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999',
'info_dict': { 'info_dict': {
@@ -122,7 +140,7 @@ class NHLVideocenterIE(NHLBaseInfoExtractor):
response = self._download_webpage(request_url, playlist_title) response = self._download_webpage(request_url, playlist_title)
response = self._fix_json(response) response = self._fix_json(response)
if not response.strip(): if not response.strip():
self._downloader.report_warning(u'Got an empty reponse, trying ' self._downloader.report_warning('Got an empty reponse, trying '
'adding the "newvideos" parameter') 'adding the "newvideos" parameter')
response = self._download_webpage(request_url + '&newvideos=true', response = self._download_webpage(request_url + '&newvideos=true',
playlist_title) playlist_title)

View File

@@ -20,6 +20,7 @@ class NocoIE(InfoExtractor):
_VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)' _VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
_LOGIN_URL = 'http://noco.tv/do.php' _LOGIN_URL = 'http://noco.tv/do.php'
_API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s' _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
_SUB_LANG_TEMPLATE = '&sub_lang=%s'
_NETRC_MACHINE = 'noco' _NETRC_MACHINE = 'noco'
_TEST = { _TEST = {
@@ -60,10 +61,12 @@ class NocoIE(InfoExtractor):
if 'erreur' in login: if 'erreur' in login:
raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True) raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
def _call_api(self, path, video_id, note): def _call_api(self, path, video_id, note, sub_lang=None):
ts = compat_str(int(time.time() * 1000)) ts = compat_str(int(time.time() * 1000))
tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest() tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
url = self._API_URL_TEMPLATE % (path, ts, tk) url = self._API_URL_TEMPLATE % (path, ts, tk)
if sub_lang:
url += self._SUB_LANG_TEMPLATE % sub_lang
resp = self._download_json(url, video_id, note) resp = self._download_json(url, video_id, note)
@@ -91,31 +94,34 @@ class NocoIE(InfoExtractor):
formats = [] formats = []
for format_id, fmt in medias['fr']['video_list']['none']['quality_list'].items(): for lang, lang_dict in medias['fr']['video_list'].items():
for format_id, fmt in lang_dict['quality_list'].items():
format_id_extended = '%s-%s' % (lang, format_id) if lang != 'none' else format_id
video = self._call_api( video = self._call_api(
'shows/%s/video/%s/fr' % (video_id, format_id.lower()), 'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
video_id, 'Downloading %s video JSON' % format_id) video_id, 'Downloading %s video JSON' % format_id_extended,
lang if lang != 'none' else None)
file_url = video['file'] file_url = video['file']
if not file_url: if not file_url:
continue continue
if file_url in ['forbidden', 'not found']: if file_url in ['forbidden', 'not found']:
popmessage = video['popmessage'] popmessage = video['popmessage']
self._raise_error(popmessage['title'], popmessage['message']) self._raise_error(popmessage['title'], popmessage['message'])
formats.append({ formats.append({
'url': file_url, 'url': file_url,
'format_id': format_id, 'format_id': format_id_extended,
'width': fmt['res_width'], 'width': fmt['res_width'],
'height': fmt['res_lines'], 'height': fmt['res_lines'],
'abr': fmt['audiobitrate'], 'abr': fmt['audiobitrate'],
'vbr': fmt['videobitrate'], 'vbr': fmt['videobitrate'],
'filesize': fmt['filesize'], 'filesize': fmt['filesize'],
'format_note': qualities[format_id]['quality_name'], 'format_note': qualities[format_id]['quality_name'],
'preference': qualities[format_id]['priority'], 'preference': qualities[format_id]['priority'],
}) })
self._sort_formats(formats) self._sort_formats(formats)

View File

@@ -4,6 +4,8 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError,
clean_html,
compat_urllib_parse, compat_urllib_parse,
) )
@@ -28,6 +30,11 @@ class PlayvidIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
m_error = re.search(
r'<div class="block-error">\s*<div class="heading">\s*<div>(?P<msg>.+?)</div>\s*</div>', webpage)
if m_error:
raise ExtractorError(clean_html(m_error.group('msg')), expected=True)
video_title = None video_title = None
duration = None duration = None
video_thumbnail = None video_thumbnail = None

View File

@@ -85,7 +85,7 @@ class ProSiebenSat1IE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Im Interview: Kai Wiesinger', 'title': 'Im Interview: Kai Wiesinger',
'description': 'md5:e4e5370652ec63b95023e914190b4eb9', 'description': 'md5:e4e5370652ec63b95023e914190b4eb9',
'upload_date': '20140225', 'upload_date': '20140203',
'duration': 522.56, 'duration': 522.56,
}, },
'params': { 'params': {
@@ -100,7 +100,7 @@ class ProSiebenSat1IE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Jagd auf Fertigkost im Elsthal - Teil 2', 'title': 'Jagd auf Fertigkost im Elsthal - Teil 2',
'description': 'md5:2669cde3febe9bce13904f701e774eb6', 'description': 'md5:2669cde3febe9bce13904f701e774eb6',
'upload_date': '20140225', 'upload_date': '20141014',
'duration': 2410.44, 'duration': 2410.44,
}, },
'params': { 'params': {
@@ -152,12 +152,22 @@ class ProSiebenSat1IE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, },
{
'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist',
'info_dict': {
'id': '439664',
'title': 'Episode 8 - Ganze Folge - Playlist',
'description': 'md5:63b8963e71f481782aeea877658dec84',
},
'playlist_count': 2,
},
] ]
_CLIPID_REGEXES = [ _CLIPID_REGEXES = [
r'"clip_id"\s*:\s+"(\d+)"', r'"clip_id"\s*:\s+"(\d+)"',
r'clipid: "(\d+)"', r'clipid: "(\d+)"',
r'clip[iI]d=(\d+)', r'clip[iI]d=(\d+)',
r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
] ]
_TITLE_REGEXES = [ _TITLE_REGEXES = [
r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>', r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
@@ -178,11 +188,19 @@ class ProSiebenSat1IE(InfoExtractor):
r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>', r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',
r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>', r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>',
] ]
_PAGE_TYPE_REGEXES = [
r'<meta name="page_type" content="([^"]+)">',
r"'itemType'\s*:\s*'([^']*)'",
]
_PLAYLIST_ID_REGEXES = [
r'content[iI]d=(\d+)',
r"'itemId'\s*:\s*'([^']*)'",
]
_PLAYLIST_CLIP_REGEXES = [
r'(?s)data-qvt=.+?<a href="([^"]+)"',
]
def _real_extract(self, url): def _extract_clip(self, url, webpage):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id') clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')
access_token = 'testclient' access_token = 'testclient'
@@ -281,3 +299,31 @@ class ProSiebenSat1IE(InfoExtractor):
'duration': duration, 'duration': duration,
'formats': formats, 'formats': formats,
} }
def _extract_playlist(self, url, webpage):
playlist_id = self._html_search_regex(
self._PLAYLIST_ID_REGEXES, webpage, 'playlist id')
for regex in self._PLAYLIST_CLIP_REGEXES:
playlist_clips = re.findall(regex, webpage)
if playlist_clips:
title = self._html_search_regex(
self._TITLE_REGEXES, webpage, 'title')
description = self._html_search_regex(
self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
entries = [
self.url_result(
re.match('(.+?//.+?)/', url).group(1) + clip_path,
'ProSiebenSat1')
for clip_path in playlist_clips]
return self.playlist_result(entries, playlist_id, title, description)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
page_type = self._search_regex(
self._PAGE_TYPE_REGEXES, webpage,
'page type', default='clip').lower()
if page_type == 'clip':
return self._extract_clip(url, webpage)
elif page_type == 'playlist':
return self._extract_playlist(url, webpage)

View File

@@ -0,0 +1,55 @@
from __future__ import unicode_literals
import json
from .common import InfoExtractor
class RadioDeIE(InfoExtractor):
IE_NAME = 'radio.de'
_VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)'
_TEST = {
'url': 'http://ndr2.radio.de/',
'md5': '3b4cdd011bc59174596b6145cda474a4',
'info_dict': {
'id': 'ndr2',
'ext': 'mp3',
'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:591c49c702db1a33751625ebfb67f273',
'thumbnail': 're:^https?://.*\.png',
},
'params': {
'skip_download': True,
}
}
def _real_extract(self, url):
radio_id = self._match_id(url)
webpage = self._download_webpage(url, radio_id)
broadcast = json.loads(self._search_regex(
r'_getBroadcast\s*=\s*function\(\s*\)\s*{\s*return\s+({.+?})\s*;\s*}',
webpage, 'broadcast'))
title = self._live_title(broadcast['name'])
description = broadcast.get('description') or broadcast.get('shortDescription')
thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl')
formats = [{
'url': stream['streamUrl'],
'ext': stream['streamContentFormat'].lower(),
'acodec': stream['streamContentFormat'],
'abr': stream['bitRate'],
'asr': stream['sampleRate']
} for stream in broadcast['streamUrls']]
self._sort_formats(formats)
return {
'id': radio_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'is_live': True,
'formats': formats,
}

View File

@@ -1,7 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
@@ -9,32 +7,23 @@ class RedTubeIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?redtube\.com/(?P<id>[0-9]+)' _VALID_URL = r'http://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
_TEST = { _TEST = {
'url': 'http://www.redtube.com/66418', 'url': 'http://www.redtube.com/66418',
'file': '66418.mp4',
# md5 varies from time to time, as in
# https://travis-ci.org/rg3/youtube-dl/jobs/14052463#L295
#'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
'info_dict': { 'info_dict': {
'id': '66418',
'ext': 'mp4',
"title": "Sucked on a toilet", "title": "Sucked on a toilet",
"age_limit": 18, "age_limit": 18,
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
video_extension = 'mp4'
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
self.report_extraction(video_id)
video_url = self._html_search_regex( video_url = self._html_search_regex(
r'<source src="(.+?)" type="video/mp4">', webpage, u'video URL') r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
video_title = self._html_search_regex( video_title = self._html_search_regex(
r'<h1 class="videoTitle[^"]*">(.+?)</h1>', r'<h1 class="videoTitle[^"]*">(.+?)</h1>',
webpage, u'title') webpage, 'title')
video_thumbnail = self._og_search_thumbnail(webpage) video_thumbnail = self._og_search_thumbnail(webpage)
# No self-labeling, but they describe themselves as # No self-labeling, but they describe themselves as
@@ -44,7 +33,7 @@ class RedTubeIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_url,
'ext': video_extension, 'ext': 'mp4',
'title': video_title, 'title': video_title,
'thumbnail': video_thumbnail, 'thumbnail': video_thumbnail,
'age_limit': age_limit, 'age_limit': age_limit,

View File

@@ -15,7 +15,7 @@ from ..utils import (
class RTSIE(InfoExtractor): class RTSIE(InfoExtractor):
IE_DESC = 'RTS.ch' IE_DESC = 'RTS.ch'
_VALID_URL = r'^https?://(?:www\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-.*?\.html' _VALID_URL = r'https?://(?:www\.)?rts\.ch/(?:(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html|play/tv/[^/]+/video/(?P<display_id_new>.+?)\?id=(?P<id_new>[0-9]+))'
_TESTS = [ _TESTS = [
{ {
@@ -23,6 +23,7 @@ class RTSIE(InfoExtractor):
'md5': '753b877968ad8afaeddccc374d4256a5', 'md5': '753b877968ad8afaeddccc374d4256a5',
'info_dict': { 'info_dict': {
'id': '3449373', 'id': '3449373',
'display_id': 'les-enfants-terribles',
'ext': 'mp4', 'ext': 'mp4',
'duration': 1488, 'duration': 1488,
'title': 'Les Enfants Terribles', 'title': 'Les Enfants Terribles',
@@ -30,7 +31,8 @@ class RTSIE(InfoExtractor):
'uploader': 'Divers', 'uploader': 'Divers',
'upload_date': '19680921', 'upload_date': '19680921',
'timestamp': -40280400, 'timestamp': -40280400,
'thumbnail': 're:^https?://.*\.image' 'thumbnail': 're:^https?://.*\.image',
'view_count': int,
}, },
}, },
{ {
@@ -38,6 +40,7 @@ class RTSIE(InfoExtractor):
'md5': 'c148457a27bdc9e5b1ffe081a7a8337b', 'md5': 'c148457a27bdc9e5b1ffe081a7a8337b',
'info_dict': { 'info_dict': {
'id': '5624067', 'id': '5624067',
'display_id': 'entre-ciel-et-mer',
'ext': 'mp4', 'ext': 'mp4',
'duration': 3720, 'duration': 3720,
'title': 'Les yeux dans les cieux - Mon homard au Canada', 'title': 'Les yeux dans les cieux - Mon homard au Canada',
@@ -45,7 +48,8 @@ class RTSIE(InfoExtractor):
'uploader': 'Passe-moi les jumelles', 'uploader': 'Passe-moi les jumelles',
'upload_date': '20140404', 'upload_date': '20140404',
'timestamp': 1396635300, 'timestamp': 1396635300,
'thumbnail': 're:^https?://.*\.image' 'thumbnail': 're:^https?://.*\.image',
'view_count': int,
}, },
}, },
{ {
@@ -53,6 +57,7 @@ class RTSIE(InfoExtractor):
'md5': 'b4326fecd3eb64a458ba73c73e91299d', 'md5': 'b4326fecd3eb64a458ba73c73e91299d',
'info_dict': { 'info_dict': {
'id': '5745975', 'id': '5745975',
'display_id': '1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski',
'ext': 'mp4', 'ext': 'mp4',
'duration': 48, 'duration': 48,
'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski', 'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski',
@@ -60,7 +65,8 @@ class RTSIE(InfoExtractor):
'uploader': 'Hockey', 'uploader': 'Hockey',
'upload_date': '20140403', 'upload_date': '20140403',
'timestamp': 1396556882, 'timestamp': 1396556882,
'thumbnail': 're:^https?://.*\.image' 'thumbnail': 're:^https?://.*\.image',
'view_count': int,
}, },
'skip': 'Blocked outside Switzerland', 'skip': 'Blocked outside Switzerland',
}, },
@@ -69,6 +75,7 @@ class RTSIE(InfoExtractor):
'md5': '9bb06503773c07ce83d3cbd793cebb91', 'md5': '9bb06503773c07ce83d3cbd793cebb91',
'info_dict': { 'info_dict': {
'id': '5745356', 'id': '5745356',
'display_id': 'londres-cachee-par-un-epais-smog',
'ext': 'mp4', 'ext': 'mp4',
'duration': 33, 'duration': 33,
'title': 'Londres cachée par un épais smog', 'title': 'Londres cachée par un épais smog',
@@ -76,7 +83,8 @@ class RTSIE(InfoExtractor):
'uploader': 'Le Journal en continu', 'uploader': 'Le Journal en continu',
'upload_date': '20140403', 'upload_date': '20140403',
'timestamp': 1396537322, 'timestamp': 1396537322,
'thumbnail': 're:^https?://.*\.image' 'thumbnail': 're:^https?://.*\.image',
'view_count': int,
}, },
}, },
{ {
@@ -84,6 +92,7 @@ class RTSIE(InfoExtractor):
'md5': 'dd8ef6a22dff163d063e2a52bc8adcae', 'md5': 'dd8ef6a22dff163d063e2a52bc8adcae',
'info_dict': { 'info_dict': {
'id': '5706148', 'id': '5706148',
'display_id': 'urban-hippie-de-damien-krisl-03-04-2014',
'ext': 'mp3', 'ext': 'mp3',
'duration': 123, 'duration': 123,
'title': '"Urban Hippie", de Damien Krisl', 'title': '"Urban Hippie", de Damien Krisl',
@@ -92,22 +101,44 @@ class RTSIE(InfoExtractor):
'timestamp': 1396551600, 'timestamp': 1396551600,
}, },
}, },
{
'url': 'http://www.rts.ch/play/tv/-/video/le-19h30?id=6348260',
'md5': '968777c8779e5aa2434be96c54e19743',
'info_dict': {
'id': '6348260',
'display_id': 'le-19h30',
'ext': 'mp4',
'duration': 1796,
'title': 'Le 19h30',
'description': '',
'uploader': 'Le 19h30',
'upload_date': '20141201',
'timestamp': 1417458600,
'thumbnail': 're:^https?://.*\.image',
'view_count': int,
},
},
{
'url': 'http://www.rts.ch/play/tv/le-19h30/video/le-chantier-du-nouveau-parlement-vaudois-a-permis-une-trouvaille-historique?id=6348280',
'only_matching': True,
}
] ]
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url) m = re.match(self._VALID_URL, url)
video_id = m.group('id') video_id = m.group('id') or m.group('id_new')
display_id = m.group('display_id') or m.group('display_id_new')
def download_json(internal_id): def download_json(internal_id):
return self._download_json( return self._download_json(
'http://www.rts.ch/a/%s.html?f=json/article' % internal_id, 'http://www.rts.ch/a/%s.html?f=json/article' % internal_id,
video_id) display_id)
all_info = download_json(video_id) all_info = download_json(video_id)
# video_id extracted out of URL is not always a real id # video_id extracted out of URL is not always a real id
if 'video' not in all_info and 'audio' not in all_info: if 'video' not in all_info and 'audio' not in all_info:
page = self._download_webpage(url, video_id) page = self._download_webpage(url, display_id)
internal_id = self._html_search_regex( internal_id = self._html_search_regex(
r'<(?:video|audio) data-id="([0-9]+)"', page, r'<(?:video|audio) data-id="([0-9]+)"', page,
'internal video id') 'internal video id')
@@ -143,6 +174,7 @@ class RTSIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id,
'formats': formats, 'formats': formats,
'title': info['title'], 'title': info['title'],
'description': info.get('intro'), 'description': info.get('intro'),

View File

@@ -53,6 +53,7 @@ class RutubeIE(InfoExtractor):
m3u8_url = options['video_balancer'].get('m3u8') m3u8_url = options['video_balancer'].get('m3u8')
if m3u8_url is None: if m3u8_url is None:
raise ExtractorError('Couldn\'t find m3u8 manifest url') raise ExtractorError('Couldn\'t find m3u8 manifest url')
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
return { return {
'id': video['id'], 'id': video['id'],
@@ -60,8 +61,7 @@ class RutubeIE(InfoExtractor):
'description': video['description'], 'description': video['description'],
'duration': video['duration'], 'duration': video['duration'],
'view_count': video['hits'], 'view_count': video['hits'],
'url': m3u8_url, 'formats': formats,
'ext': 'mp4',
'thumbnail': video['thumbnail_url'], 'thumbnail': video['thumbnail_url'],
'uploader': author.get('name'), 'uploader': author.get('name'),
'uploader_id': compat_str(author['id']) if author else None, 'uploader_id': compat_str(author['id']) if author else None,

View File

@@ -39,7 +39,7 @@ class SlideshareIE(InfoExtractor):
ext = info['jsplayer']['video_extension'] ext = info['jsplayer']['video_extension']
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext) video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
description = self._html_search_regex( description = self._html_search_regex(
r'<p\s+(?:style="[^"]*"\s+)?class="description.*?"[^>]*>(.*?)</p>', webpage, r'<p\s+(?:style="[^"]*"\s+)?class=".*?description.*?"[^>]*>(.*?)</p>', webpage,
'description', fatal=False) 'description', fatal=False)
return { return {

View File

@@ -1,4 +1,5 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals
import json import json
import re import re
@@ -11,13 +12,14 @@ class SohuIE(InfoExtractor):
_VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?' _VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?'
_TEST = { _TEST = {
u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super', 'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super',
u'file': u'382479172.mp4', 'md5': 'bde8d9a6ffd82c63a1eefaef4eeefec7',
u'md5': u'bde8d9a6ffd82c63a1eefaef4eeefec7', 'info_dict': {
u'info_dict': { 'id': '382479172',
u'title': u'MVFar East Movement《The Illest》', 'ext': 'mp4',
'title': 'MVFar East Movement《The Illest》',
}, },
u'skip': u'Only available from China', 'skip': 'Only available from China',
} }
def _real_extract(self, url): def _real_extract(self, url):
@@ -26,11 +28,11 @@ class SohuIE(InfoExtractor):
if mytv: if mytv:
base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid=' base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid='
else: else:
base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid=' base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
data_url = base_data_url + str(vid_id) data_url = base_data_url + str(vid_id)
data_json = self._download_webpage( data_json = self._download_webpage(
data_url, video_id, data_url, video_id,
note=u'Downloading JSON data for ' + str(vid_id)) note='Downloading JSON data for ' + str(vid_id))
return json.loads(data_json) return json.loads(data_json)
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@@ -39,11 +41,11 @@ class SohuIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>', raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>',
webpage, u'video title') webpage, 'video title')
title = raw_title.partition('-')[0].strip() title = raw_title.partition('-')[0].strip()
vid = self._html_search_regex(r'var vid ?= ?["\'](\d+)["\']', webpage, vid = self._html_search_regex(r'var vid ?= ?["\'](\d+)["\']', webpage,
u'video path') 'video path')
data = _fetch_data(vid, mytv) data = _fetch_data(vid, mytv)
QUALITIES = ('ori', 'super', 'high', 'nor') QUALITIES = ('ori', 'super', 'high', 'nor')
@@ -51,7 +53,7 @@ class SohuIE(InfoExtractor):
for q in QUALITIES for q in QUALITIES
if data['data'][q + 'Vid'] != 0] if data['data'][q + 'Vid'] != 0]
if not vid_ids: if not vid_ids:
raise ExtractorError(u'No formats available for this video') raise ExtractorError('No formats available for this video')
# For now, we just pick the highest available quality # For now, we just pick the highest available quality
vid_id = vid_ids[-1] vid_id = vid_ids[-1]
@@ -69,7 +71,7 @@ class SohuIE(InfoExtractor):
(allot, prot, clipsURL[i], su[i])) (allot, prot, clipsURL[i], su[i]))
part_str = self._download_webpage( part_str = self._download_webpage(
part_url, video_id, part_url, video_id,
note=u'Downloading part %d of %d' % (i + 1, part_count)) note='Downloading part %d of %d' % (i + 1, part_count))
part_info = part_str.split('|') part_info = part_str.split('|')
video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3]) video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])

View File

@@ -32,7 +32,7 @@ class SoundcloudIE(InfoExtractor):
(?P<title>[\w\d-]+)/? (?P<title>[\w\d-]+)/?
(?P<token>[^?]+?)?(?:[?].*)?$) (?P<token>[^?]+?)?(?:[?].*)?$)
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+) |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
(?:/?\?secret_token=(?P<secret_token>[^&]+?))?$) (?:/?\?secret_token=(?P<secret_token>[^&]+))?)
|(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*) |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
) )
''' '''

View File

@@ -33,5 +33,6 @@ class SpaceIE(InfoExtractor):
# Other videos works fine with the info from the object # Other videos works fine with the info from the object
brightcove_url = BrightcoveIE._extract_brightcove_url(webpage) brightcove_url = BrightcoveIE._extract_brightcove_url(webpage)
if brightcove_url is None: if brightcove_url is None:
raise ExtractorError(u'The webpage does not contain a video', expected=True) raise ExtractorError(
'The webpage does not contain a video', expected=True)
return self.url_result(brightcove_url, BrightcoveIE.ie_key()) return self.url_result(brightcove_url, BrightcoveIE.ie_key())

View File

@@ -1,7 +1,8 @@
from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
compat_str,
ExtractorError, ExtractorError,
) )
@@ -17,10 +18,10 @@ class SubtitlesInfoExtractor(InfoExtractor):
sub_lang_list = self._get_available_subtitles(video_id, webpage) sub_lang_list = self._get_available_subtitles(video_id, webpage)
auto_captions_list = self._get_available_automatic_caption(video_id, webpage) auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
sub_lang = ",".join(list(sub_lang_list.keys())) sub_lang = ",".join(list(sub_lang_list.keys()))
self.to_screen(u'%s: Available subtitles for video: %s' % self.to_screen('%s: Available subtitles for video: %s' %
(video_id, sub_lang)) (video_id, sub_lang))
auto_lang = ",".join(auto_captions_list.keys()) auto_lang = ",".join(auto_captions_list.keys())
self.to_screen(u'%s: Available automatic captions for video: %s' % self.to_screen('%s: Available automatic captions for video: %s' %
(video_id, auto_lang)) (video_id, auto_lang))
def extract_subtitles(self, video_id, webpage): def extract_subtitles(self, video_id, webpage):
@@ -51,7 +52,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
sub_lang_list = {} sub_lang_list = {}
for sub_lang in requested_langs: for sub_lang in requested_langs:
if sub_lang not in available_subs_list: if sub_lang not in available_subs_list:
self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang) self._downloader.report_warning('no closed captions found in the specified language "%s"' % sub_lang)
continue continue
sub_lang_list[sub_lang] = available_subs_list[sub_lang] sub_lang_list[sub_lang] = available_subs_list[sub_lang]
@@ -70,10 +71,10 @@ class SubtitlesInfoExtractor(InfoExtractor):
try: try:
sub = self._download_subtitle_url(sub_lang, url) sub = self._download_subtitle_url(sub_lang, url)
except ExtractorError as err: except ExtractorError as err:
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err))) self._downloader.report_warning('unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
return return
if not sub: if not sub:
self._downloader.report_warning(u'Did not fetch video subtitles') self._downloader.report_warning('Did not fetch video subtitles')
return return
return sub return sub
@@ -94,5 +95,5 @@ class SubtitlesInfoExtractor(InfoExtractor):
Must be redefined by the subclasses that support automatic captions, Must be redefined by the subclasses that support automatic captions,
otherwise it will return {} otherwise it will return {}
""" """
self._downloader.report_warning(u'Automatic Captions not supported by this server') self._downloader.report_warning('Automatic Captions not supported by this server')
return {} return {}

View File

@@ -4,10 +4,11 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import parse_filesize
class TagesschauIE(InfoExtractor): class TagesschauIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/video/video(?P<id>-?[0-9]+)\.html' _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:sendung/ts|video/video)(?P<id>-?[0-9]+)\.html'
_TESTS = [{ _TESTS = [{
'url': 'http://www.tagesschau.de/multimedia/video/video1399128.html', 'url': 'http://www.tagesschau.de/multimedia/video/video1399128.html',
@@ -19,6 +20,16 @@ class TagesschauIE(InfoExtractor):
'description': 'md5:69da3c61275b426426d711bde96463ab', 'description': 'md5:69da3c61275b426426d711bde96463ab',
'thumbnail': 're:^http:.*\.jpg$', 'thumbnail': 're:^http:.*\.jpg$',
}, },
}, {
'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
'md5': '3c54c1f6243d279b706bde660ceec633',
'info_dict': {
'id': '5727',
'ext': 'mp4',
'description': 'md5:695c01bfd98b7e313c501386327aea59',
'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr',
'thumbnail': 're:^http:.*\.jpg$',
}
}] }]
_FORMATS = { _FORMATS = {
@@ -28,42 +39,82 @@ class TagesschauIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id') display_id = video_id.lstrip('-')
if video_id.startswith('-'):
display_id = video_id.strip('-')
else:
display_id = video_id
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
playerpage = self._download_webpage( player_url = self._html_search_meta(
'http://www.tagesschau.de/multimedia/video/video%s~player_autoplay-true.html' % video_id, 'twitter:player', webpage, 'player URL', default=None)
display_id, 'Downloading player page') if player_url:
playerpage = self._download_webpage(
player_url, display_id, 'Downloading player page')
medias = re.findall( medias = re.findall(
r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"', r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"',
playerpage) playerpage)
formats = []
formats = [] for url, ext, res in medias:
for url, ext, res in medias: f = {
f = { 'format_id': res + '_' + ext,
'format_id': res + '_' + ext, 'url': url,
'url': url, 'ext': ext,
'ext': ext, }
} f.update(self._FORMATS.get(res, {}))
f.update(self._FORMATS.get(res, {})) formats.append(f)
formats.append(f) thumbnail_fn = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1]
title = self._og_search_title(webpage).strip()
description = self._og_search_description(webpage).strip()
else:
download_text = self._search_regex(
r'(?s)<p>Wir bieten dieses Video in folgenden Formaten zum Download an:</p>\s*<div class="controls">(.*?)</div>\s*<p>',
webpage, 'download links')
links = re.finditer(
r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>',
download_text)
formats = []
for l in links:
format_id = self._search_regex(
r'.*/[^/.]+\.([^/]+)\.[^/.]+', l.group('url'), 'format ID')
format = {
'format_id': format_id,
'url': l.group('url'),
'format_name': l.group('name'),
}
m = re.match(
r'''(?x)
Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10;
(?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10;
(?P<vbr>[0-9]+)kbps&\#10;
Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10;
Gr&ouml;&szlig;e:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''',
l.group('title'))
if m:
format.update({
'format_note': m.group('audio_desc'),
'vcodec': m.group('vcodec'),
'width': int(m.group('width')),
'height': int(m.group('height')),
'abr': int(m.group('abr')),
'vbr': int(m.group('vbr')),
'filesize_approx': parse_filesize(m.group('filesize_approx')),
})
formats.append(format)
thumbnail_fn = self._search_regex(
r'(?s)<img alt="Sendungsbild".*?src="([^"]+)"',
webpage, 'thumbnail', fatal=False)
description = self._html_search_regex(
r'(?s)<p class="teasertext">(.*?)</p>',
webpage, 'description', fatal=False)
title = self._html_search_regex(
r'<span class="headline".*?>(.*?)</span>', webpage, 'title')
self._sort_formats(formats) self._sort_formats(formats)
thumbnail = 'http://www.tagesschau.de' + thumbnail_fn
thumbnail = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1]
return { return {
'id': display_id, 'id': display_id,
'title': self._og_search_title(webpage).strip(), 'title': title,
'thumbnail': 'http://www.tagesschau.de' + thumbnail, 'thumbnail': thumbnail,
'formats': formats, 'formats': formats,
'description': self._og_search_description(webpage).strip(), 'description': description,
} }

View File

@@ -0,0 +1,62 @@
# encoding: utf-8
from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..utils import (
js_to_json,
qualities,
)
class TassIE(InfoExtractor):
_VALID_URL = r'https?://(?:tass\.ru|itar-tass\.com)/[^/]+/(?P<id>\d+)'
_TESTS = [
{
'url': 'http://tass.ru/obschestvo/1586870',
'md5': '3b4cdd011bc59174596b6145cda474a4',
'info_dict': {
'id': '1586870',
'ext': 'mp4',
'title': 'Посетителям московского зоопарка показали красную панду',
'description': 'Приехавшую из Дублина Зейну можно увидеть в павильоне "Кошки тропиков"',
'thumbnail': 're:^https?://.*\.jpg$',
},
},
{
'url': 'http://itar-tass.com/obschestvo/1600009',
'only_matching': True,
},
]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
sources = json.loads(js_to_json(self._search_regex(
r'(?s)sources\s*:\s*(\[.+?\])', webpage, 'sources')))
quality = qualities(['sd', 'hd'])
formats = []
for source in sources:
video_url = source.get('file')
if not video_url or not video_url.startswith('http') or not video_url.endswith('.mp4'):
continue
label = source.get('label')
formats.append({
'url': video_url,
'format_id': label,
'quality': quality(label),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'formats': formats,
}

View File

@@ -199,8 +199,9 @@ class TEDIE(SubtitlesInfoExtractor):
webpage = self._download_webpage(url, name) webpage = self._download_webpage(url, name)
config_json = self._html_search_regex( config_json = self._html_search_regex(
r"data-config='([^']+)", webpage, 'config') r'"pages\.jwplayer"\s*,\s*({.+?})\s*\)\s*</script>',
config = json.loads(config_json) webpage, 'config')
config = json.loads(config_json)['config']
video_url = config['video']['url'] video_url = config['video']['url']
thumbnail = config.get('image', {}).get('url') thumbnail = config.get('image', {}).get('url')

View File

@@ -1,28 +1,28 @@
from __future__ import unicode_literals
import json import json
import re
from .common import InfoExtractor from .common import InfoExtractor
class TriluliluIE(InfoExtractor): class TriluliluIE(InfoExtractor):
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?trilulilu\.ro/video-(?P<category>[^/]+)/(?P<video_id>[^/]+)' _VALID_URL = r'https?://(?:www\.)?trilulilu\.ro/video-[^/]+/(?P<id>[^/]+)'
_TEST = { _TEST = {
u"url": u"http://www.trilulilu.ro/video-animatie/big-buck-bunny-1", 'url': 'http://www.trilulilu.ro/video-animatie/big-buck-bunny-1',
u'file': u"big-buck-bunny-1.mp4", 'info_dict': {
u'info_dict': { 'id': 'big-buck-bunny-1',
u"title": u"Big Buck Bunny", 'ext': 'mp4',
u"description": u":) pentru copilul din noi", 'title': 'Big Buck Bunny',
'description': ':) pentru copilul din noi',
}, },
# Server ignores Range headers (--test) # Server ignores Range headers (--test)
u"params": { 'params': {
u"skip_download": True 'skip_download': True
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage) title = self._og_search_title(webpage)
@@ -30,20 +30,20 @@ class TriluliluIE(InfoExtractor):
description = self._og_search_description(webpage) description = self._og_search_description(webpage)
log_str = self._search_regex( log_str = self._search_regex(
r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, u'log info') r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, 'log info')
log = json.loads(log_str) log = json.loads(log_str)
format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/' format_url = ('http://fs%(server)s.trilulilu.ro/%(hash)s/'
u'video-formats2' % log) 'video-formats2' % log)
format_doc = self._download_xml( format_doc = self._download_xml(
format_url, video_id, format_url, video_id,
note=u'Downloading formats', note='Downloading formats',
errnote=u'Error while downloading formats') errnote='Error while downloading formats')
video_url_template = ( video_url_template = (
u'http://fs%(server)s.trilulilu.ro/stream.php?type=video' 'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
u'&source=site&hash=%(hash)s&username=%(userid)s&' '&source=site&hash=%(hash)s&username=%(userid)s&'
u'key=ministhebest&format=%%s&sig=&exp=' % 'key=ministhebest&format=%%s&sig=&exp=' %
log) log)
formats = [ formats = [
{ {

View File

@@ -73,7 +73,7 @@ class TudouIE(InfoExtractor):
result = [] result = []
len_parts = len(parts) len_parts = len(parts)
if len_parts > 1: if len_parts > 1:
self.to_screen(u'%s: found %s parts' % (video_id, len_parts)) self.to_screen('%s: found %s parts' % (video_id, len_parts))
for part in parts: for part in parts:
part_id = part['k'] part_id = part['k']
final_url = self._url_for_id(part_id, quality) final_url = self._url_for_id(part_id, quality)

View File

@@ -19,6 +19,7 @@ class TuneInIE(InfoExtractor):
|tun\.in/(?P<redirect_id>[A-Za-z0-9]+) |tun\.in/(?P<redirect_id>[A-Za-z0-9]+)
) )
''' '''
_API_URL_TEMPLATE = 'http://tunein.com/tuner/tune/?stationId={0:}&tuneType=Station'
_INFO_DICT = { _INFO_DICT = {
'id': '34682', 'id': '34682',
@@ -56,13 +57,10 @@ class TuneInIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
station_id = mobj.group('id') station_id = mobj.group('id')
webpage = self._download_webpage( station_info = self._download_json(
url, station_id, note='Downloading station webpage') self._API_URL_TEMPLATE.format(station_id),
station_id, note='Downloading station JSON')
payload = self._html_search_regex(
r'(?m)TuneIn\.payload\s*=\s*(\{[^$]+?)$', webpage, 'JSON data')
json_data = json.loads(payload)
station_info = json_data['Station']['broadcast']
title = station_info['Title'] title = station_info['Title']
thumbnail = station_info.get('Logo') thumbnail = station_info.get('Logo')
location = station_info.get('Location') location = station_info.get('Location')

View File

@@ -1,32 +1,30 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
float_or_none, float_or_none,
str_to_int, parse_age_limit,
) )
class TvigleIE(InfoExtractor): class TvigleIE(InfoExtractor):
IE_NAME = 'tvigle' IE_NAME = 'tvigle'
IE_DESC = 'Интернет-телевидение Tvigle.ru' IE_DESC = 'Интернет-телевидение Tvigle.ru'
_VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$' _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<id>[^/]+)/$'
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.tvigle.ru/video/brat/', 'url': 'http://www.tvigle.ru/video/sokrat/',
'md5': 'ff4344a4894b0524441fb6f8218dc716', 'md5': '36514aed3657d4f70b4b2cef8eb520cd',
'info_dict': { 'info_dict': {
'id': '5118490', 'id': '1848932',
'display_id': 'brat', 'display_id': 'sokrat',
'ext': 'mp4', 'ext': 'flv',
'title': 'Брат', 'title': 'Сократ',
'description': 'md5:d16ac7c0b47052ea51fddb92c4e413eb', 'description': 'md5:a05bd01be310074d5833efc6743be95e',
'duration': 5722.6, 'duration': 6586,
'age_limit': 16, 'age_limit': 0,
}, },
}, },
{ {
@@ -44,8 +42,7 @@ class TvigleIE(InfoExtractor):
] ]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) display_id = self._match_id(url)
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
@@ -60,8 +57,8 @@ class TvigleIE(InfoExtractor):
title = item['title'] title = item['title']
description = item['description'] description = item['description']
thumbnail = item['thumbnail'] thumbnail = item['thumbnail']
duration = float_or_none(item['durationMilliseconds'], 1000) duration = float_or_none(item.get('durationMilliseconds'), 1000)
age_limit = str_to_int(item['ageRestrictions']) age_limit = parse_age_limit(item.get('ageRestrictions'))
formats = [] formats = []
for vcodec, fmts in item['videos'].items(): for vcodec, fmts in item['videos'].items():

View File

@@ -1,40 +1,35 @@
import json from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
class TvpIE(InfoExtractor): class TvpIE(InfoExtractor):
IE_NAME = u'tvp.pl' IE_NAME = 'tvp.pl'
_VALID_URL = r'https?://www\.tvp\.pl/.*?wideo/(?P<date>\d+)/(?P<id>\d+)' _VALID_URL = r'https?://www\.tvp\.pl/.*?wideo/(?P<date>\d+)/(?P<id>\d+)'
_TEST = { _TEST = {
u'url': u'http://www.tvp.pl/warszawa/magazyny/campusnews/wideo/31102013/12878238', 'url': 'http://www.tvp.pl/warszawa/magazyny/campusnews/wideo/31102013/12878238',
u'md5': u'148408967a6a468953c0a75cbdaf0d7a', 'md5': '148408967a6a468953c0a75cbdaf0d7a',
u'file': u'12878238.wmv', 'info_dict': {
u'info_dict': { 'id': '12878238',
u'title': u'31.10.2013 - Odcinek 2', 'ext': 'wmv',
u'description': u'31.10.2013 - Odcinek 2', 'title': '31.10.2013 - Odcinek 2',
'description': '31.10.2013 - Odcinek 2',
}, },
u'skip': u'Download has to use same server IP as extraction. Therefore, a good (load-balancing) DNS resolver will make the download fail.' 'skip': 'Download has to use same server IP as extraction. Therefore, a good (load-balancing) DNS resolver will make the download fail.'
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
json_url = 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id json_url = 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id
json_params = self._download_webpage( params = self._download_json(
json_url, video_id, u"Downloading video metadata") json_url, video_id, "Downloading video metadata")
params = json.loads(json_params)
self.report_extraction(video_id)
video_url = params['video_url'] video_url = params['video_url']
title = self._og_search_title(webpage, fatal=True)
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': self._og_search_title(webpage),
'ext': 'wmv', 'ext': 'wmv',
'url': video_url, 'url': video_url,
'description': self._og_search_description(webpage), 'description': self._og_search_description(webpage),

View File

@@ -0,0 +1,109 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
int_or_none,
)
class TwentyFourVideoIE(InfoExtractor):
IE_NAME = '24video'
_VALID_URL = r'https?://(?:www\.)?24video\.net/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
_TESTS = [
{
'url': 'http://www.24video.net/video/view/1044982',
'md5': '48dd7646775690a80447a8dca6a2df76',
'info_dict': {
'id': '1044982',
'ext': 'mp4',
'title': 'Эротика каменного века',
'description': 'Как смотрели порно в каменном веке.',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'SUPERTELO',
'duration': 31,
'timestamp': 1275937857,
'upload_date': '20100607',
'age_limit': 18,
'like_count': int,
'dislike_count': int,
},
},
{
'url': 'http://www.24video.net/player/new24_play.swf?id=1044982',
'only_matching': True,
}
]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://www.24video.net/video/view/%s' % video_id, video_id)
title = self._og_search_title(webpage)
description = self._html_search_regex(
r'<span itemprop="description">([^<]+)</span>', webpage, 'description', fatal=False)
thumbnail = self._og_search_thumbnail(webpage)
duration = int_or_none(self._og_search_property(
'duration', webpage, 'duration', fatal=False))
timestamp = parse_iso8601(self._search_regex(
r'<time id="video-timeago" datetime="([^"]+)" itemprop="uploadDate">',
webpage, 'upload date'))
uploader = self._html_search_regex(
r'Загрузил\s*<a href="/jsecUser/movies/[^"]+" class="link">([^<]+)</a>',
webpage, 'uploader', fatal=False)
view_count = int_or_none(self._html_search_regex(
r'<span class="video-views">(\d+) просмотр',
webpage, 'view count', fatal=False))
comment_count = int_or_none(self._html_search_regex(
r'<div class="comments-title" id="comments-count">(\d+) комментари',
webpage, 'comment count', fatal=False))
formats = []
pc_video = self._download_xml(
'http://www.24video.net/video/xml/%s?mode=play' % video_id,
video_id, 'Downloading PC video URL').find('.//video')
formats.append({
'url': pc_video.attrib['url'],
'format_id': 'pc',
'quality': 1,
})
like_count = int_or_none(pc_video.get('ratingPlus'))
dislike_count = int_or_none(pc_video.get('ratingMinus'))
age_limit = 18 if pc_video.get('adult') == 'true' else 0
mobile_video = self._download_xml(
'http://www.24video.net/video/xml/%s' % video_id,
video_id, 'Downloading mobile video URL').find('.//video')
formats.append({
'url': mobile_video.attrib['url'],
'format_id': 'mobile',
'quality': 0,
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'duration': duration,
'timestamp': timestamp,
'view_count': view_count,
'comment_count': comment_count,
'like_count': like_count,
'dislike_count': dislike_count,
'age_limit': age_limit,
'formats': formats,
}

View File

@@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import itertools import itertools
@@ -5,6 +6,8 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urllib_parse,
compat_urllib_request,
ExtractorError, ExtractorError,
parse_iso8601, parse_iso8601,
) )
@@ -24,6 +27,7 @@ class TwitchIE(InfoExtractor):
""" """
_PAGE_LIMIT = 100 _PAGE_LIMIT = 100
_API_BASE = 'https://api.twitch.tv' _API_BASE = 'https://api.twitch.tv'
_LOGIN_URL = 'https://secure.twitch.tv/user/login'
_TESTS = [{ _TESTS = [{
'url': 'http://www.twitch.tv/riotgames/b/577357806', 'url': 'http://www.twitch.tv/riotgames/b/577357806',
'info_dict': { 'info_dict': {
@@ -109,6 +113,44 @@ class TwitchIE(InfoExtractor):
'view_count': info['views'], 'view_count': info['views'],
} }
def _real_initialize(self):
self._login()
def _login(self):
(username, password) = self._get_login_info()
if username is None:
return
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
authenticity_token = self._search_regex(
r'<input name="authenticity_token" type="hidden" value="([^"]+)"',
login_page, 'authenticity token')
login_form = {
'utf8': ''.encode('utf-8'),
'authenticity_token': authenticity_token,
'redirect_on_login': '',
'embed_form': 'false',
'mp_source_action': '',
'follow': '',
'user[login]': username,
'user[password]': password,
}
request = compat_urllib_request.Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
request.add_header('Referer', self._LOGIN_URL)
response = self._download_webpage(
request, None, 'Logging in as %s' % username)
m = re.search(
r"id=([\"'])login_error_message\1[^>]*>(?P<msg>[^<]+)", response)
if m:
raise ExtractorError(
'Unable to login: %s' % m.group('msg').strip(), expected=True)
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
if mobj.group('chapterid'): if mobj.group('chapterid'):

View File

@@ -40,8 +40,24 @@ class UdemyIE(InfoExtractor):
error_str += ' - %s' % error_data.get('formErrors') error_str += ' - %s' % error_data.get('formErrors')
raise ExtractorError(error_str, expected=True) raise ExtractorError(error_str, expected=True)
def _download_json(self, url, video_id, note='Downloading JSON metadata'): def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
response = super(UdemyIE, self)._download_json(url, video_id, note) headers = {
'X-Udemy-Snail-Case': 'true',
'X-Requested-With': 'XMLHttpRequest',
}
for cookie in self._downloader.cookiejar:
if cookie.name == 'client_id':
headers['X-Udemy-Client-Id'] = cookie.value
elif cookie.name == 'access_token':
headers['X-Udemy-Bearer-Token'] = cookie.value
if isinstance(url_or_request, compat_urllib_request.Request):
for header, value in headers.items():
url_or_request.add_header(header, value)
else:
url_or_request = compat_urllib_request.Request(url_or_request, headers=headers)
response = super(UdemyIE, self)._download_json(url_or_request, video_id, note)
self._handle_error(response) self._handle_error(response)
return response return response
@@ -62,7 +78,9 @@ class UdemyIE(InfoExtractor):
if login_popup == '<div class="run-command close-popup redirect" data-url="https://www.udemy.com/"></div>': if login_popup == '<div class="run-command close-popup redirect" data-url="https://www.udemy.com/"></div>':
return return
csrf = self._html_search_regex(r'<input type="hidden" name="csrf" value="(.+?)"', login_popup, 'csrf token') csrf = self._html_search_regex(
r'<input type="hidden" name="csrf" value="(.+?)"',
login_popup, 'csrf token')
login_form = { login_form = {
'email': username, 'email': username,
@@ -71,42 +89,49 @@ class UdemyIE(InfoExtractor):
'displayType': 'json', 'displayType': 'json',
'isSubmitted': '1', 'isSubmitted': '1',
} }
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) request = compat_urllib_request.Request(
response = self._download_json(request, None, 'Logging in as %s' % username) self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
response = self._download_json(
request, None, 'Logging in as %s' % username)
if 'returnUrl' not in response: if 'returnUrl' not in response:
raise ExtractorError('Unable to log in') raise ExtractorError('Unable to log in')
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) lecture_id = self._match_id(url)
lecture_id = mobj.group('id')
lecture = self._download_json( lecture = self._download_json(
'https://www.udemy.com/api-1.1/lectures/%s' % lecture_id, lecture_id, 'Downloading lecture JSON') 'https://www.udemy.com/api-1.1/lectures/%s' % lecture_id,
lecture_id, 'Downloading lecture JSON')
if lecture['assetType'] != 'Video': asset_type = lecture.get('assetType') or lecture.get('asset_type')
raise ExtractorError('Lecture %s is not a video' % lecture_id, expected=True) if asset_type != 'Video':
raise ExtractorError(
'Lecture %s is not a video' % lecture_id, expected=True)
asset = lecture['asset'] asset = lecture['asset']
stream_url = asset['streamUrl'] stream_url = asset.get('streamUrl') or asset.get('stream_url')
mobj = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', stream_url) mobj = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', stream_url)
if mobj: if mobj:
return self.url_result(mobj.group(1), 'Youtube') return self.url_result(mobj.group(1), 'Youtube')
video_id = asset['id'] video_id = asset['id']
thumbnail = asset['thumbnailUrl'] thumbnail = asset.get('thumbnailUrl') or asset.get('thumbnail_url')
duration = asset['data']['duration'] duration = asset['data']['duration']
download_url = asset['downloadUrl'] download_url = asset.get('downloadUrl') or asset.get('download_url')
video = download_url.get('Video') or download_url.get('video')
video_480p = download_url.get('Video480p') or download_url.get('video_480p')
formats = [ formats = [
{ {
'url': download_url['Video480p'][0], 'url': video_480p[0],
'format_id': '360p', 'format_id': '360p',
}, },
{ {
'url': download_url['Video'][0], 'url': video[0],
'format_id': '720p', 'format_id': '720p',
}, },
] ]
@@ -140,25 +165,29 @@ class UdemyCourseIE(UdemyIE):
course_path = mobj.group('coursepath') course_path = mobj.group('coursepath')
response = self._download_json( response = self._download_json(
'https://www.udemy.com/api-1.1/courses/%s' % course_path, course_path, 'Downloading course JSON') 'https://www.udemy.com/api-1.1/courses/%s' % course_path,
course_path, 'Downloading course JSON')
course_id = int(response['id']) course_id = int(response['id'])
course_title = response['title'] course_title = response['title']
webpage = self._download_webpage( webpage = self._download_webpage(
'https://www.udemy.com/course/subscribe/?courseId=%s' % course_id, course_id, 'Enrolling in the course') 'https://www.udemy.com/course/subscribe/?courseId=%s' % course_id,
course_id, 'Enrolling in the course')
if self._SUCCESSFULLY_ENROLLED in webpage: if self._SUCCESSFULLY_ENROLLED in webpage:
self.to_screen('%s: Successfully enrolled in' % course_id) self.to_screen('%s: Successfully enrolled in' % course_id)
elif self._ALREADY_ENROLLED in webpage: elif self._ALREADY_ENROLLED in webpage:
self.to_screen('%s: Already enrolled in' % course_id) self.to_screen('%s: Already enrolled in' % course_id)
response = self._download_json('https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id, response = self._download_json(
course_id, 'Downloading course curriculum') 'https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id,
course_id, 'Downloading course curriculum')
entries = [ entries = [
self.url_result('https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), 'Udemy') self.url_result(
for asset in response if asset.get('assetType') == 'Video' 'https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), 'Udemy')
for asset in response if asset.get('assetType') or asset.get('asset_type') == 'Video'
] ]
return self.playlist_result(entries, course_id, course_title) return self.playlist_result(entries, course_id, course_title)

View File

@@ -13,7 +13,7 @@ from ..utils import (
class VevoIE(InfoExtractor): class VevoIE(InfoExtractor):
""" """
Accepts urls from vevo.com or in the format 'vevo:{id}' Accepts urls from vevo.com or in the format 'vevo:{id}'
(currently used by MTVIE) (currently used by MTVIE and MySpaceIE)
""" """
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?| (?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?|

View File

@@ -17,7 +17,7 @@ class VGTVIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '84196', 'id': '84196',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Hevnen er søt episode 1:10 - Abu', 'title': 'Hevnen er søt: Episode 10 - Abu',
'description': 'md5:e25e4badb5f544b04341e14abdc72234', 'description': 'md5:e25e4badb5f544b04341e14abdc72234',
'thumbnail': 're:^https?://.*\.jpg', 'thumbnail': 're:^https?://.*\.jpg',
'duration': 648.000, 'duration': 648.000,
@@ -35,7 +35,7 @@ class VGTVIE(InfoExtractor):
'title': 'OPPTAK: VGTV følger EM-kvalifiseringen', 'title': 'OPPTAK: VGTV følger EM-kvalifiseringen',
'description': 'md5:3772d9c0dc2dff92a886b60039a7d4d3', 'description': 'md5:3772d9c0dc2dff92a886b60039a7d4d3',
'thumbnail': 're:^https?://.*\.jpg', 'thumbnail': 're:^https?://.*\.jpg',
'duration': 9056.000, 'duration': 9103.0,
'timestamp': 1410113864, 'timestamp': 1410113864,
'upload_date': '20140907', 'upload_date': '20140907',
'view_count': int, 'view_count': int,

View File

@@ -1,32 +1,33 @@
import re from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
find_xpath_attr, find_xpath_attr,
determine_ext, int_or_none,
) )
class VideofyMeIE(InfoExtractor): class VideofyMeIE(InfoExtractor):
_VALID_URL = r'https?://(www\.videofy\.me/.+?|p\.videofy\.me/v)/(?P<id>\d+)(&|#|$)' _VALID_URL = r'https?://(?:www\.videofy\.me/.+?|p\.videofy\.me/v)/(?P<id>\d+)(&|#|$)'
IE_NAME = u'videofy.me' IE_NAME = 'videofy.me'
_TEST = { _TEST = {
u'url': u'http://www.videofy.me/thisisvideofyme/1100701', 'url': 'http://www.videofy.me/thisisvideofyme/1100701',
u'file': u'1100701.mp4', 'md5': 'c77d700bdc16ae2e9f3c26019bd96143',
u'md5': u'c77d700bdc16ae2e9f3c26019bd96143', 'info_dict': {
u'info_dict': { 'id': '1100701',
u'title': u'This is VideofyMe', 'ext': 'mp4',
u'description': None, 'title': 'This is VideofyMe',
u'uploader': u'VideofyMe', 'description': None,
u'uploader_id': u'thisisvideofyme', 'uploader': 'VideofyMe',
'uploader_id': 'thisisvideofyme',
'view_count': int,
}, },
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
config = self._download_xml('http://sunshine.videofy.me/?videoId=%s' % video_id, config = self._download_xml('http://sunshine.videofy.me/?videoId=%s' % video_id,
video_id) video_id)
video = config.find('video') video = config.find('video')
@@ -34,14 +35,16 @@ class VideofyMeIE(InfoExtractor):
url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key) url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)
for key in ['on', 'av', 'off']] if node is not None) for key in ['on', 'av', 'off']] if node is not None)
video_url = url_node.find('url').text video_url = url_node.find('url').text
view_count = int_or_none(self._search_regex(
r'([0-9]+)', video.find('views').text, 'view count', fatal=False))
return {'id': video_id, return {
'title': video.find('title').text, 'id': video_id,
'url': video_url, 'title': video.find('title').text,
'ext': determine_ext(video_url), 'url': video_url,
'thumbnail': video.find('thumb').text, 'thumbnail': video.find('thumb').text,
'description': video.find('description').text, 'description': video.find('description').text,
'uploader': config.find('blog/name').text, 'uploader': config.find('blog/name').text,
'uploader_id': video.find('identifier').text, 'uploader_id': video.find('identifier').text,
'view_count': re.search(r'\d+', video.find('views').text).group(), 'view_count': view_count,
} }

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re import re
import random import random
@@ -5,23 +7,22 @@ from .common import InfoExtractor
class VideoPremiumIE(InfoExtractor): class VideoPremiumIE(InfoExtractor):
_VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.(?:tv|me)/(?P<id>\w+)(?:/.*)?' _VALID_URL = r'https?://(?:www\.)?videopremium\.(?:tv|me)/(?P<id>\w+)(?:/.*)?'
_TEST = { _TEST = {
u'url': u'http://videopremium.tv/4w7oadjsf156', 'url': 'http://videopremium.tv/4w7oadjsf156',
u'file': u'4w7oadjsf156.f4v', 'info_dict': {
u'info_dict': { 'id': '4w7oadjsf156',
u"title": u"youtube-dl_test_video____a_________-BaW_jenozKc.mp4.mp4" 'ext': 'f4v',
'title': 'youtube-dl_test_video____a_________-BaW_jenozKc.mp4.mp4'
}, },
u'params': { 'params': {
u'skip_download': True, 'skip_download': True,
}, },
u'skip': u'Test file has been deleted.', 'skip': 'Test file has been deleted.',
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
webpage_url = 'http://videopremium.tv/' + video_id webpage_url = 'http://videopremium.tv/' + video_id
webpage = self._download_webpage(webpage_url, video_id) webpage = self._download_webpage(webpage_url, video_id)
@@ -29,10 +30,10 @@ class VideoPremiumIE(InfoExtractor):
# Download again, we need a cookie # Download again, we need a cookie
webpage = self._download_webpage( webpage = self._download_webpage(
webpage_url, video_id, webpage_url, video_id,
note=u'Downloading webpage again (with cookie)') note='Downloading webpage again (with cookie)')
video_title = self._html_search_regex( video_title = self._html_search_regex(
r'<h2(?:.*?)>\s*(.+?)\s*<', webpage, u'video title') r'<h2(?:.*?)>\s*(.+?)\s*<', webpage, 'video title')
return { return {
'id': video_id, 'id': video_id,

View File

@@ -63,29 +63,36 @@ class VineIE(InfoExtractor):
class VineUserIE(InfoExtractor): class VineUserIE(InfoExtractor):
IE_NAME = 'vine:user' IE_NAME = 'vine:user'
_VALID_URL = r'(?:https?://)?vine\.co/(?P<user>[^/]+)/?(\?.*)?$' _VALID_URL = r'(?:https?://)?vine\.co/(?P<u>u/)?(?P<user>[^/]+)/?(\?.*)?$'
_VINE_BASE_URL = "https://vine.co/" _VINE_BASE_URL = "https://vine.co/"
_TEST = { _TESTS = [
'url': 'https://vine.co/Visa', {
'info_dict': { 'url': 'https://vine.co/Visa',
'id': 'Visa', 'info_dict': {
'id': 'Visa',
},
'playlist_mincount': 46,
}, },
'playlist_mincount': 46, {
} 'url': 'https://vine.co/u/941705360593584128',
'only_matching': True,
},
]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
user = mobj.group('user') user = mobj.group('user')
u = mobj.group('u')
profile_url = "%sapi/users/profiles/vanity/%s" % ( profile_url = "%sapi/users/profiles/%s%s" % (
self._VINE_BASE_URL, user) self._VINE_BASE_URL, 'vanity/' if not u else '', user)
profile_data = self._download_json( profile_data = self._download_json(
profile_url, user, note='Downloading user profile data') profile_url, user, note='Downloading user profile data')
user_id = profile_data['data']['userId'] user_id = profile_data['data']['userId']
timeline_data = [] timeline_data = []
for pagenum in itertools.count(1): for pagenum in itertools.count(1):
timeline_url = "%sapi/timelines/users/%s?page=%s" % ( timeline_url = "%sapi/timelines/users/%s?page=%s&size=100" % (
self._VINE_BASE_URL, user_id, pagenum) self._VINE_BASE_URL, user_id, pagenum)
timeline_page = self._download_json( timeline_page = self._download_json(
timeline_url, user, note='Downloading page %d' % pagenum) timeline_url, user, note='Downloading page %d' % pagenum)

View File

@@ -1,6 +1,8 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_chr, compat_chr,
@@ -25,6 +27,7 @@ class XMinusIE(InfoExtractor):
'tbr': 320, 'tbr': 320,
'filesize_approx': 5900000, 'filesize_approx': 5900000,
'view_count': int, 'view_count': int,
'description': 'md5:03238c5b663810bc79cf42ef3c03e371',
} }
} }
@@ -48,9 +51,14 @@ class XMinusIE(InfoExtractor):
view_count = int_or_none(self._html_search_regex( view_count = int_or_none(self._html_search_regex(
r'<div class="quality.*?► ([0-9]+)', r'<div class="quality.*?► ([0-9]+)',
webpage, 'view count', fatal=False)) webpage, 'view count', fatal=False))
description = self._html_search_regex(
r'(?s)<div id="song_texts">(.*?)</div><br',
webpage, 'song lyrics', fatal=False)
if description:
description = re.sub(' *\r *', '\n', description)
enc_token = self._html_search_regex( enc_token = self._html_search_regex(
r'data-mt="(.*?)"', webpage, 'enc_token') r'minus_track\.tkn="(.+?)"', webpage, 'enc_token')
token = ''.join( token = ''.join(
c if pos == 3 else compat_chr(compat_ord(c) - 1) c if pos == 3 else compat_chr(compat_ord(c) - 1)
for pos, c in enumerate(reversed(enc_token))) for pos, c in enumerate(reversed(enc_token)))
@@ -64,4 +72,5 @@ class XMinusIE(InfoExtractor):
'filesize_approx': filesize_approx, 'filesize_approx': filesize_approx,
'tbr': tbr, 'tbr': tbr,
'view_count': view_count, 'view_count': view_count,
'description': description,
} }

View File

@@ -45,11 +45,13 @@ class YouPornIE(InfoExtractor):
age_limit = self._rta_search(webpage) age_limit = self._rta_search(webpage)
# Get JSON parameters # Get JSON parameters
json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, 'JSON parameters') json_params = self._search_regex(
r'var currentVideo = new Video\((.*)\)[,;]',
webpage, 'JSON parameters')
try: try:
params = json.loads(json_params) params = json.loads(json_params)
except: except:
raise ExtractorError(u'Invalid JSON') raise ExtractorError('Invalid JSON')
self.report_extraction(video_id) self.report_extraction(video_id)
try: try:
@@ -103,7 +105,7 @@ class YouPornIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
if not formats: if not formats:
raise ExtractorError(u'ERROR: no known formats available for video') raise ExtractorError('ERROR: no known formats available for video')
return { return {
'id': video_id, 'id': video_id,

View File

@@ -7,6 +7,7 @@ import itertools
import json import json
import os.path import os.path
import re import re
import time
import traceback import traceback
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
@@ -38,17 +39,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
"""Provide base functions for Youtube extractors""" """Provide base functions for Youtube extractors"""
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin' _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
_TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor' _TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
_LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
_AGE_URL = 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
_NETRC_MACHINE = 'youtube' _NETRC_MACHINE = 'youtube'
# If True it will raise an error if no login info is provided # If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False _LOGIN_REQUIRED = False
def _set_language(self): def _set_language(self):
return bool(self._download_webpage( self._set_cookie(
self._LANG_URL, None, '.youtube.com', 'PREF', 'f1=50000000&hl=en',
note='Setting language', errnote='unable to set language', # YouTube sets the expire time to about two months
fatal=False)) expire_time=time.time() + 2 * 30 * 24 * 3600)
def _login(self): def _login(self):
""" """
@@ -176,30 +175,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return False return False
return True return True
def _confirm_age(self):
age_form = {
'next_url': '/',
'action_confirm': 'Confirm',
}
req = compat_urllib_request.Request(
self._AGE_URL,
compat_urllib_parse.urlencode(age_form).encode('ascii')
)
self._download_webpage(
req, None,
note='Confirming age', errnote='Unable to confirm age',
fatal=False)
def _real_initialize(self): def _real_initialize(self):
if self._downloader is None: if self._downloader is None:
return return
if self._get_login_info()[0] is not None: self._set_language()
if not self._set_language():
return
if not self._login(): if not self._login():
return return
self._confirm_age()
class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
@@ -305,6 +286,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'}, '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'}, '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'},
# Dash webm audio # Dash webm audio
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50}, '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
@@ -398,8 +380,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'info_dict': { 'info_dict': {
'id': 'IB3lcPjvWLA', 'id': 'IB3lcPjvWLA',
'ext': 'm4a', 'ext': 'm4a',
'title': 'Afrojack - The Spark ft. Spree Wilson', 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
'description': 'md5:9717375db5a9a3992be4668bbf3bc0a8', 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
'uploader': 'AfrojackVEVO', 'uploader': 'AfrojackVEVO',
'uploader_id': 'AfrojackVEVO', 'uploader_id': 'AfrojackVEVO',
'upload_date': '20131011', 'upload_date': '20131011',
@@ -421,6 +403,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'title': 'Burning Everyone\'s Koran', 'title': 'Burning Everyone\'s Koran',
'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html', 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
} }
},
# Normal age-gate video (No vevo, embed allowed)
{
'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
'info_dict': {
'id': 'HtVdAasjOgU',
'ext': 'mp4',
'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
'description': 'md5:eca57043abae25130f58f655ad9a7771',
'uploader': 'The Witcher',
'uploader_id': 'WitcherGame',
'upload_date': '20140605',
},
},
# video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
{
'url': '__2ABJjxzNo',
'info_dict': {
'id': '__2ABJjxzNo',
'ext': 'mp4',
'upload_date': '20100430',
'uploader_id': 'deadmau5',
'description': 'md5:12c56784b8032162bb936a5f76d55360',
'uploader': 'deadmau5',
'title': 'Deadmau5 - Some Chords (HD)',
},
'expected_warnings': [
'DASH manifest missing',
]
} }
] ]
@@ -609,9 +620,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return {} return {}
player_config = json.loads(mobj.group(1)) player_config = json.loads(mobj.group(1))
try: try:
args = player_config[u'args'] args = player_config['args']
caption_url = args[u'ttsurl'] caption_url = args['ttsurl']
timestamp = args[u'timestamp'] timestamp = args['timestamp']
# We get the available subtitles # We get the available subtitles
list_params = compat_urllib_parse.urlencode({ list_params = compat_urllib_parse.urlencode({
'type': 'list', 'type': 'list',
@@ -671,6 +682,46 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.') return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
def _parse_dash_manifest(
self, video_id, dash_manifest_url, player_url, age_gate):
def decrypt_sig(mobj):
s = mobj.group(1)
dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
return '/signature/%s' % dec_s
dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
dash_doc = self._download_xml(
dash_manifest_url, video_id,
note='Downloading DASH manifest',
errnote='Could not download DASH manifest')
formats = []
for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
if url_el is None:
continue
format_id = r.attrib['id']
video_url = url_el.text
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
f = {
'format_id': format_id,
'url': video_url,
'width': int_or_none(r.attrib.get('width')),
'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
'asr': int_or_none(r.attrib.get('audioSamplingRate')),
'filesize': filesize,
'fps': int_or_none(r.attrib.get('frameRate')),
}
try:
existing_format = next(
fo for fo in formats
if fo['format_id'] == format_id)
except StopIteration:
f.update(self._formats.get(format_id, {}))
formats.append(f)
else:
existing_format.update(f)
return formats
def _real_extract(self, url): def _real_extract(self, url):
proto = ( proto = (
'http' if self._downloader.params.get('prefer_insecure', False) 'http' if self._downloader.params.get('prefer_insecure', False)
@@ -684,16 +735,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# Get video webpage # Get video webpage
url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
pref_cookies = [
c for c in self._downloader.cookiejar
if c.domain == '.youtube.com' and c.name == 'PREF']
for pc in pref_cookies:
if 'hl=' in pc.value:
pc.value = re.sub(r'hl=[^&]+', 'hl=en', pc.value)
else:
if pc.value:
pc.value += '&'
pc.value += 'hl=en'
video_webpage = self._download_webpage(url, video_id) video_webpage = self._download_webpage(url, video_id)
# Attempt to extract SWF player URL # Attempt to extract SWF player URL
@@ -704,7 +745,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
player_url = None player_url = None
# Get video info # Get video info
self.report_video_info_webpage_download(video_id)
if re.search(r'player-age-gate-content">', video_webpage) is not None: if re.search(r'player-age-gate-content">', video_webpage) is not None:
age_gate = True age_gate = True
# We simulate the access to the video from www.youtube.com/v/{video_id} # We simulate the access to the video from www.youtube.com/v/{video_id}
@@ -723,15 +763,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
video_info = compat_parse_qs(video_info_webpage) video_info = compat_parse_qs(video_info_webpage)
else: else:
age_gate = False age_gate = False
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: try:
video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' # Try looking directly into the video webpage
% (video_id, el_type)) mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
video_info_webpage = self._download_webpage(video_info_url, video_id, if not mobj:
note=False, raise ValueError('Could not find ytplayer.config') # caught below
errnote='unable to download video info webpage') json_code = uppercase_escape(mobj.group(1))
video_info = compat_parse_qs(video_info_webpage) ytplayer_config = json.loads(json_code)
if 'token' in video_info: args = ytplayer_config['args']
break # Convert to the same format returned by compat_parse_qs
video_info = dict((k, [v]) for k, v in args.items())
if 'url_encoded_fmt_stream_map' not in args:
raise ValueError('No stream_map present') # caught below
except ValueError:
# We fallback to the get_video_info pages (used by the embed page)
self.report_video_info_webpage_download(video_id)
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
video_info_url = (
'%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
% (proto, video_id, el_type))
video_info_webpage = self._download_webpage(
video_info_url,
video_id, note=False,
errnote='unable to download video info webpage')
video_info = compat_parse_qs(video_info_webpage)
if 'token' in video_info:
break
if 'token' not in video_info: if 'token' not in video_info:
if 'reason' in video_info: if 'reason' in video_info:
raise ExtractorError( raise ExtractorError(
@@ -856,32 +913,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if self._downloader.params.get('writeannotations', False): if self._downloader.params.get('writeannotations', False):
video_annotations = self._extract_annotations(video_id) video_annotations = self._extract_annotations(video_id)
# Decide which formats to download
try:
mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
if not mobj:
raise ValueError('Could not find vevo ID')
json_code = uppercase_escape(mobj.group(1))
ytplayer_config = json.loads(json_code)
args = ytplayer_config['args']
# Easy way to know if the 's' value is in url_encoded_fmt_stream_map
# this signatures are encrypted
if 'url_encoded_fmt_stream_map' not in args:
raise ValueError('No stream_map present') # caught below
re_signature = re.compile(r'[&,]s=')
m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
if m_s is not None:
self.to_screen('%s: Encrypted signatures detected.' % video_id)
video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
m_s = re_signature.search(args.get('adaptive_fmts', ''))
if m_s is not None:
if 'adaptive_fmts' in video_info:
video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
else:
video_info['adaptive_fmts'] = [args['adaptive_fmts']]
except ValueError:
pass
def _map_to_format_list(urlmap): def _map_to_format_list(urlmap):
formats = [] formats = []
for itag, video_real_url in urlmap.items(): for itag, video_real_url in urlmap.items():
@@ -968,53 +999,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# Look for the DASH manifest # Look for the DASH manifest
if self._downloader.params.get('youtube_include_dash_manifest', True): if self._downloader.params.get('youtube_include_dash_manifest', True):
try: dash_mpd = video_info.get('dashmpd')
# The DASH manifest used needs to be the one from the original video_webpage. if not dash_mpd:
# The one found in get_video_info seems to be using different signatures. self.report_warning('%s: DASH manifest missing' % video_id)
# However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage. else:
# Luckily, it seems, this case uses some kind of default signature (len == 86), so the dash_manifest_url = dash_mpd[0]
# combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here. try:
if age_gate: dash_formats = self._parse_dash_manifest(
dash_manifest_url = video_info.get('dashmpd')[0] video_id, dash_manifest_url, player_url, age_gate)
except (ExtractorError, KeyError) as e:
self.report_warning(
'Skipping DASH manifest: %r' % e, video_id)
else: else:
dash_manifest_url = ytplayer_config['args']['dashmpd'] formats.extend(dash_formats)
def decrypt_sig(mobj):
s = mobj.group(1)
dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
return '/signature/%s' % dec_s
dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
dash_doc = self._download_xml(
dash_manifest_url, video_id,
note='Downloading DASH manifest',
errnote='Could not download DASH manifest')
for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
if url_el is None:
continue
format_id = r.attrib['id']
video_url = url_el.text
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
f = {
'format_id': format_id,
'url': video_url,
'width': int_or_none(r.attrib.get('width')),
'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
'asr': int_or_none(r.attrib.get('audioSamplingRate')),
'filesize': filesize,
}
try:
existing_format = next(
fo for fo in formats
if fo['format_id'] == format_id)
except StopIteration:
f.update(self._formats.get(format_id, {}))
formats.append(f)
else:
existing_format.update(f)
except (ExtractorError, KeyError) as e:
self.report_warning('Skipping DASH manifest: %r' % e, video_id)
self._sort_formats(formats) self._sort_formats(formats)
@@ -1256,7 +1253,7 @@ class YoutubeTopListIE(YoutubePlaylistIE):
class YoutubeChannelIE(InfoExtractor): class YoutubeChannelIE(InfoExtractor):
IE_DESC = 'YouTube.com channels' IE_DESC = 'YouTube.com channels'
_VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)" _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
_MORE_PAGES_INDICATOR = 'yt-uix-load-more' _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
_MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s' _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
IE_NAME = 'youtube:channel' IE_NAME = 'youtube:channel'
@@ -1274,13 +1271,8 @@ class YoutubeChannelIE(InfoExtractor):
return ids_in_page return ids_in_page
def _real_extract(self, url): def _real_extract(self, url):
# Extract channel id channel_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError('Invalid URL: %s' % url)
# Download channel page
channel_id = mobj.group(1)
video_ids = [] video_ids = []
url = 'https://www.youtube.com/channel/%s/videos' % channel_id url = 'https://www.youtube.com/channel/%s/videos' % channel_id
channel_page = self._download_webpage(url, channel_id) channel_page = self._download_webpage(url, channel_id)
@@ -1294,8 +1286,12 @@ class YoutubeChannelIE(InfoExtractor):
# The videos are contained in a single page # The videos are contained in a single page
# the ajax pages can't be used, they are empty # the ajax pages can't be used, they are empty
video_ids = self.extract_videos_from_page(channel_page) video_ids = self.extract_videos_from_page(channel_page)
else: entries = [
# Download all channel pages using the json-based channel_ajax query self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in video_ids]
return self.playlist_result(entries, channel_id)
def _entries():
for pagenum in itertools.count(1): for pagenum in itertools.count(1):
url = self._MORE_PAGES_URL % (pagenum, channel_id) url = self._MORE_PAGES_URL % (pagenum, channel_id)
page = self._download_json( page = self._download_json(
@@ -1303,21 +1299,19 @@ class YoutubeChannelIE(InfoExtractor):
transform_source=uppercase_escape) transform_source=uppercase_escape)
ids_in_page = self.extract_videos_from_page(page['content_html']) ids_in_page = self.extract_videos_from_page(page['content_html'])
video_ids.extend(ids_in_page) for video_id in ids_in_page:
yield self.url_result(
video_id, 'Youtube', video_id=video_id)
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']: if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
break break
self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) return self.playlist_result(_entries(), channel_id)
url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in video_ids]
return self.playlist_result(url_entries, channel_id)
class YoutubeUserIE(InfoExtractor): class YoutubeUserIE(InfoExtractor):
IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)' IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s' _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
_GDATA_PAGE_SIZE = 50 _GDATA_PAGE_SIZE = 50
_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
@@ -1345,12 +1339,7 @@ class YoutubeUserIE(InfoExtractor):
return super(YoutubeUserIE, cls).suitable(url) return super(YoutubeUserIE, cls).suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
# Extract username username = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError('Invalid URL: %s' % url)
username = mobj.group(1)
# Download video ids using YouTube Data API. Result size per # Download video ids using YouTube Data API. Result size per
# query is limited (currently to 50 videos) so we need to query # query is limited (currently to 50 videos) so we need to query

View File

@@ -163,7 +163,10 @@ def parseOpts(overrideArguments=None):
general.add_option( general.add_option(
'--ignore-config', '--ignore-config',
action='store_true', action='store_true',
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)') help='Do not read configuration files. '
'When given in the global configuration file /etc/youtube-dl.conf: '
'Do not read the user configuration in ~/.config/youtube-dl/config '
'(%APPDATA%/youtube-dl/config.txt on Windows)')
general.add_option( general.add_option(
'--flat-playlist', '--flat-playlist',
action='store_const', dest='extract_flat', const='in_playlist', action='store_const', dest='extract_flat', const='in_playlist',
@@ -222,7 +225,7 @@ def parseOpts(overrideArguments=None):
selection.add_option( selection.add_option(
'--no-playlist', '--no-playlist',
action='store_true', dest='noplaylist', default=False, action='store_true', dest='noplaylist', default=False,
help='download only the currently playing video') help='If the URL refers to a video and a playlist, download only the video.')
selection.add_option( selection.add_option(
'--age-limit', '--age-limit',
metavar='YEARS', dest='age_limit', default=None, type=int, metavar='YEARS', dest='age_limit', default=None, type=int,

View File

@@ -1,3 +1,4 @@
from __future__ import unicode_literals
from .atomicparsley import AtomicParsleyPP from .atomicparsley import AtomicParsleyPP
from .ffmpeg import ( from .ffmpeg import (

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
from ..utils import PostProcessingError from ..utils import PostProcessingError

View File

@@ -14,7 +14,7 @@ class ExecAfterDownloadPP(PostProcessor):
def run(self, information): def run(self, information):
cmd = self.exec_cmd cmd = self.exec_cmd
if not '{}' in cmd: if '{}' not in cmd:
cmd += ' {}' cmd += ' {}'
cmd = cmd.replace('{}', shlex_quote(information['filepath'])) cmd = cmd.replace('{}', shlex_quote(information['filepath']))

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import os import os
import subprocess import subprocess
import sys import sys
@@ -33,13 +35,13 @@ class FFmpegPostProcessor(PostProcessor):
def check_version(self): def check_version(self):
if not self._executable: if not self._executable:
raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.') raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.')
REQUIRED_VERSION = '1.0' required_version = '10-0' if self._uses_avconv() else '1.0'
if is_outdated_version( if is_outdated_version(
self._versions[self._executable], REQUIRED_VERSION): self._versions[self._executable], required_version):
warning = u'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % ( warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
self._executable, self._executable, REQUIRED_VERSION) self._executable, self._executable, required_version)
if self._downloader: if self._downloader:
self._downloader.report_warning(warning) self._downloader.report_warning(warning)
@@ -84,7 +86,7 @@ class FFmpegPostProcessor(PostProcessor):
[encodeFilename(self._ffmpeg_filename_argument(out_path), True)]) [encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
if self._downloader.params.get('verbose', False): if self._downloader.params.get('verbose', False):
self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd)) self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate() stdout, stderr = p.communicate()
if p.returncode != 0: if p.returncode != 0:
@@ -100,8 +102,8 @@ class FFmpegPostProcessor(PostProcessor):
def _ffmpeg_filename_argument(self, fn): def _ffmpeg_filename_argument(self, fn):
# ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
if fn.startswith(u'-'): if fn.startswith('-'):
return u'./' + fn return './' + fn
return fn return fn
@@ -117,7 +119,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
def get_audio_codec(self, path): def get_audio_codec(self, path):
if not self._probe_executable: if not self._probe_executable:
raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.') raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
try: try:
cmd = [ cmd = [
self._probe_executable, self._probe_executable,
@@ -153,7 +155,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
filecodec = self.get_audio_codec(path) filecodec = self.get_audio_codec(path)
if filecodec is None: if filecodec is None:
raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe') raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe')
uses_avconv = self._uses_avconv() uses_avconv = self._uses_avconv()
more_opts = [] more_opts = []
@@ -202,7 +204,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
extension = 'wav' extension = 'wav'
more_opts += ['-f', 'wav'] more_opts += ['-f', 'wav']
prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups prefix, sep, ext = path.rpartition('.') # not os.path.splitext, since the latter does not work on unicode in all setups
new_path = prefix + sep + extension new_path = prefix + sep + extension
# If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly. # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
@@ -211,16 +213,16 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
try: try:
if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)): if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path) self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path)
else: else:
self._downloader.to_screen(u'[' + self._executable + '] Destination: ' + new_path) self._downloader.to_screen('[' + self._executable + '] Destination: ' + new_path)
self.run_ffmpeg(path, new_path, acodec, more_opts) self.run_ffmpeg(path, new_path, acodec, more_opts)
except: except:
etype, e, tb = sys.exc_info() etype, e, tb = sys.exc_info()
if isinstance(e, AudioConversionError): if isinstance(e, AudioConversionError):
msg = u'audio conversion failed: ' + e.msg msg = 'audio conversion failed: ' + e.msg
else: else:
msg = u'error running ' + self._executable msg = 'error running ' + self._executable
raise PostProcessingError(msg) raise PostProcessingError(msg)
# Try to update the date time for extracted audio file. # Try to update the date time for extracted audio file.
@@ -228,7 +230,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
try: try:
os.utime(encodeFilename(new_path), (time.time(), information['filetime'])) os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
except: except:
self._downloader.report_warning(u'Cannot update utime of audio file') self._downloader.report_warning('Cannot update utime of audio file')
information['filepath'] = new_path information['filepath'] = new_path
return self._nopostoverwrites, information return self._nopostoverwrites, information
@@ -241,12 +243,12 @@ class FFmpegVideoConvertor(FFmpegPostProcessor):
def run(self, information): def run(self, information):
path = information['filepath'] path = information['filepath']
prefix, sep, ext = path.rpartition(u'.') prefix, sep, ext = path.rpartition('.')
outpath = prefix + sep + self._preferedformat outpath = prefix + sep + self._preferedformat
if information['ext'] == self._preferedformat: if information['ext'] == self._preferedformat:
self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat)) self._downloader.to_screen('[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
return True, information return True, information
self._downloader.to_screen(u'[' + 'ffmpeg' + '] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath) self._downloader.to_screen('[' + 'ffmpeg' + '] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath)
self.run_ffmpeg(path, outpath, []) self.run_ffmpeg(path, outpath, [])
information['filepath'] = outpath information['filepath'] = outpath
information['format'] = self._preferedformat information['format'] = self._preferedformat
@@ -453,11 +455,11 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
return cls._lang_map.get(code[:2]) return cls._lang_map.get(code[:2])
def run(self, information): def run(self, information):
if information['ext'] != u'mp4': if information['ext'] != 'mp4':
self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files') self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 files')
return True, information return True, information
if not information.get('subtitles'): if not information.get('subtitles'):
self._downloader.to_screen(u'[ffmpeg] There aren\'t any subtitles to embed') self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed')
return True, information return True, information
sub_langs = [key for key in information['subtitles']] sub_langs = [key for key in information['subtitles']]
@@ -472,8 +474,8 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code]) opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
opts.extend(['-f', 'mp4']) opts.extend(['-f', 'mp4'])
temp_filename = filename + u'.temp' temp_filename = filename + '.temp'
self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename) self._downloader.to_screen('[ffmpeg] Embedding subtitles in \'%s\'' % filename)
self.run_ffmpeg_multiple_files(input_files, temp_filename, opts) self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
os.remove(encodeFilename(filename)) os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename))
@@ -494,13 +496,13 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
metadata['artist'] = info['uploader_id'] metadata['artist'] = info['uploader_id']
if not metadata: if not metadata:
self._downloader.to_screen(u'[ffmpeg] There isn\'t any metadata to add') self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
return True, info return True, info
filename = info['filepath'] filename = info['filepath']
temp_filename = prepend_extension(filename, 'temp') temp_filename = prepend_extension(filename, 'temp')
if info['ext'] == u'm4a': if info['ext'] == 'm4a':
options = ['-vn', '-acodec', 'copy'] options = ['-vn', '-acodec', 'copy']
else: else:
options = ['-c', 'copy'] options = ['-c', 'copy']
@@ -508,7 +510,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
for (name, value) in metadata.items(): for (name, value) in metadata.items():
options.extend(['-metadata', '%s=%s' % (name, value)]) options.extend(['-metadata', '%s=%s' % (name, value)])
self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename) self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
self.run_ffmpeg(filename, temp_filename, options) self.run_ffmpeg(filename, temp_filename, options)
os.remove(encodeFilename(filename)) os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename))
@@ -519,7 +521,7 @@ class FFmpegMergerPP(FFmpegPostProcessor):
def run(self, info): def run(self, info):
filename = info['filepath'] filename = info['filepath']
args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-shortest'] args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-shortest']
self._downloader.to_screen(u'[ffmpeg] Merging formats into "%s"' % filename) self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename)
self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args) self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
return True, info return True, info
@@ -530,7 +532,7 @@ class FFmpegAudioFixPP(FFmpegPostProcessor):
temp_filename = prepend_extension(filename, 'temp') temp_filename = prepend_extension(filename, 'temp')
options = ['-vn', '-acodec', 'copy'] options = ['-vn', '-acodec', 'copy']
self._downloader.to_screen(u'[ffmpeg] Fixing audio file "%s"' % filename) self._downloader.to_screen('[ffmpeg] Fixing audio file "%s"' % filename)
self.run_ffmpeg(filename, temp_filename, options) self.run_ffmpeg(filename, temp_filename, options)
os.remove(encodeFilename(filename)) os.remove(encodeFilename(filename))

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import os import os
import subprocess import subprocess
import sys import sys

Some files were not shown because too many files have changed in this diff Show More