Compare commits

..

200 Commits

Author SHA1 Message Date
Philipp Hagemeister
0037e02921 release 2013.12.02 2013-12-02 13:37:26 +01:00
Philipp Hagemeister
6ad14cab59 Add --socket-timeout option 2013-12-02 13:37:05 +01:00
Philipp Hagemeister
a9be0cc736 Merge branch 'master' of github.com:rg3/youtube-dl 2013-12-02 13:36:20 +01:00
Jaime Marquínez Ferrándiz
55a10eab48 [vimeo] Add an extractor for users (closes #1871) 2013-12-01 22:36:18 +01:00
Philipp Hagemeister
e344693b65 Make socket timeout configurable, and bump default to 10 minutes (#1862) 2013-12-01 11:42:02 +01:00
Philipp Hagemeister
355e4fd07e [generic] Find embedded dailymotion videos (Fixes #1848) 2013-12-01 01:21:33 +01:00
Philipp Hagemeister
5e09d6abbd [clipfish] Skip test on travis 2013-12-01 01:16:20 +01:00
Philipp Hagemeister
b138de72f2 Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-30 00:42:56 +01:00
Philipp Hagemeister
06dcbb71d8 Clarify help of --write-pages (#1853) 2013-11-30 00:42:43 +01:00
Jaime Marquínez Ferrándiz
c5171c454b [yahoo] Force use of the http protocol for downloading the videos. 2013-11-29 22:06:17 +01:00
Philipp Hagemeister
323ec6ae56 Clarify --download-archive help 2013-11-29 15:57:43 +01:00
Jaime Marquínez Ferrándiz
befd88b786 [yahoo] Add an extractor for yahoo news (closes #1849) 2013-11-29 15:25:43 +01:00
Philipp Hagemeister
a3fb4675fb Do not mutate default arguments
In this case, it looks rather harmless (since the conditions for --restrict-filenames should not change while a process is running), but just to be sure.
This also simplifies the interface for callers, who can just pass in the idiomatic None for "I don't care, whatever is the default".
2013-11-29 15:25:11 +01:00
Philipp Hagemeister
5f077efcb1 Merge pull request #1850 from nikai3d/master
fix typo in help
2013-11-29 01:48:14 -08:00
Nicolas Kaiser
9986238ba9 fix typo in help 2013-11-29 09:48:38 +01:00
Nicolas Kaiser
e1f900d6a4 fix typo in README.md 2013-11-29 09:44:05 +01:00
Jaime Marquínez Ferrándiz
acf37ca151 [imdb] Fix the resolution values (fixes #1847)
We were using the size of the player, it was the same for all the formats
2013-11-29 07:56:14 +01:00
Philipp Hagemeister
17769d5a6c release 2013.11.29 2013-11-29 03:34:26 +01:00
Philipp Hagemeister
677c18092d [podomatic] Add extractor 2013-11-29 03:33:25 +01:00
Jaime Marquínez Ferrándiz
3862402ff3 Add an extractor for Clipsyndicate (closes #1744) 2013-11-28 14:38:10 +01:00
Jaime Marquínez Ferrándiz
b03d0d064c [imdb] Fix extraction in python 2.6
Using a regular expression because the html cannot be parsed.
2013-11-28 13:49:00 +01:00
Jaime Marquínez Ferrándiz
d8d6148628 Add an extractor for Internet Movie Database trailers (closes #1832) 2013-11-28 13:32:49 +01:00
Philipp Hagemeister
2be54167d0 release 2013.11.28.1 2013-11-28 06:17:56 +01:00
Philipp Hagemeister
4e0084d92e [youtube/subtitles] Change MD5 of vtt subtitle in test 2013-11-28 06:14:17 +01:00
Philipp Hagemeister
fc9e1cc697 [clipfish] Use FIFA trailer as testcase (#1842) 2013-11-28 06:10:37 +01:00
Philipp Hagemeister
f8f60d2793 [clipfish] Fix imports (#1842) 2013-11-28 05:54:46 +01:00
Philipp Hagemeister
ea07dbb8b1 release 2013.11.28 2013-11-28 05:48:32 +01:00
Philipp Hagemeister
2a275ab007 [zdf] Use _download_xml 2013-11-28 05:47:50 +01:00
Philipp Hagemeister
a2e6db365c [zdf] add a pseudo-testcase and fix URL matching 2013-11-28 05:47:20 +01:00
Philipp Hagemeister
9d93e7da6c Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-28 04:37:02 +01:00
Jaime Marquínez Ferrándiz
0e44d8381a [youtube:feeds] Use the 'paging' value from the downloaded json information (fixes #1845) 2013-11-28 00:33:27 +01:00
Jaime Marquínez Ferrándiz
35907e23ec [yahoo] Fix video extraction and use the new format system exclusively 2013-11-27 21:24:55 +01:00
Jaime Marquínez Ferrándiz
76d1700b28 [youtube:playlist] Fix the extraction of the title for some mixes (#1844)
Like https://www.youtube.com/watch?v=g8jDB5xOiuE&list=RDIh2gxLqR7HM
2013-11-27 20:01:51 +01:00
Philipp Hagemeister
dcca796ce4 [clipfish] Effect a better error message (#1842) 2013-11-27 18:33:51 +01:00
Filippo Valsorda
4b19e38954 [videopremium] support new .me domain 2013-11-27 02:54:51 +01:00
Jaime Marquínez Ferrándiz
5f09bbff4d [bash-completion] Complete the ':ythistory' keyword 2013-11-27 00:42:59 +01:00
Jaime Marquínez Ferrándiz
c1f9c59d11 [bash-completion] Complete filenames or directories if the previous option requires it 2013-11-27 00:41:30 +01:00
Jaime Marquínez Ferrándiz
652cdaa269 [youtube:playlist] Add support for YouTube mixes (fixes #1839) 2013-11-26 21:35:03 +01:00
Jaime Marquínez Ferrándiz
e26f871228 Use the new '_download_xml' helper in more extractors 2013-11-26 19:17:25 +01:00
Jaime Marquínez Ferrándiz
6e47b51eef [youtube:playlist] Remove the link with index 0
It's not the first video of the playlist, it appears in the 'Play all' button (see the test course for an example)
2013-11-26 19:09:14 +01:00
Jaime Marquínez Ferrándiz
4a98cdbf3b YoutubeDL: set the 'params' property before any message/warning/error is sent (fixes #1840)
If it sets the 'restrictfilenames' param, it will first report a warning. It will try to get the logger from the 'params' property, which would be set at that moment to None, raising the error 'AttributeError: 'NoneType' object has no attribute 'get''
2013-11-26 18:54:14 +01:00
Philipp Hagemeister
c5ed4e8f7e release 2013.11.26 2013-11-26 10:41:35 +01:00
Jaime Marquínez Ferrándiz
c2e52508cc Include the proxy in the parameters for YoutubeDL (fixes #1831) 2013-11-26 08:03:11 +01:00
Philipp Hagemeister
d8ec4959c8 Merge pull request #1830 from jaimeMF/download-archive
Use the 'extractor_key' field for the download archive file
2013-11-25 14:14:25 -08:00
Jaime Marquínez Ferrándiz
d31209a144 Use the 'extractor_key' field for the download archive file
It has the same value as the ie_key.
2013-11-25 22:57:15 +01:00
Jaime Marquínez Ferrándiz
529a2e2cc3 Fix typo in the documentation of the 'download_archive' param 2013-11-25 22:52:09 +01:00
Philipp Hagemeister
781a7d0546 release 2013.11.25.3 2013-11-25 22:36:18 +01:00
Philipp Hagemeister
fb04e40396 [soundcloud] Support for listing of audio-only files 2013-11-25 22:34:56 +01:00
Philipp Hagemeister
d9b011f201 Fix rtmpdump with non-ASCII filenames on Windows on 2.x
Reported in #1798
2013-11-25 22:31:38 +01:00
Philipp Hagemeister
b0b9eaa196 Merge pull request #1829 from jaimeMF/ydl-empty-params
Allow to initialize a YoutubeDL object without parameters
2013-11-25 13:19:59 -08:00
Philipp Hagemeister
8b134b1062 Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-25 22:16:07 +01:00
Philipp Hagemeister
0c75c3fa7a Do not warn about fixed output template if --max-downloads is 1
Fixes #1828
2013-11-25 22:15:33 +01:00
Jaime Marquínez Ferrándiz
a3927cf7ee Allow to initialize a YoutubeDL object without parameters
Having to pass the 'outtmpl' parameter feels really strange when you just want to extract the info of a video.
2013-11-25 22:03:39 +01:00
Jaime Marquínez Ferrándiz
1a62c18f65 [bambuser] Skip the download in the test
It doesn't respect the 'Range' header.
2013-11-25 22:03:20 +01:00
Philipp Hagemeister
2a15e7063b [soundcloud] Prefer HTTP over RTMP (#1798) 2013-11-25 20:30:41 +01:00
Philipp Hagemeister
d46cc192d7 Reduce socket timeout 2013-11-25 19:11:01 +01:00
Philipp Hagemeister
bb2bebdbe1 release 2013.11.25.2 2013-11-25 15:47:14 +01:00
Philipp Hagemeister
5db07df634 Fix --download-archive (Fixes #1826) 2013-11-25 15:46:54 +01:00
Philipp Hagemeister
ea36cbac5e Merge remote-tracking branch 'rbrito/swap-dimensions' 2013-11-25 06:19:15 +01:00
Philipp Hagemeister
d0d2b49ab7 [FileDownloader] use moved format_bytes method 2013-11-25 06:17:41 +01:00
Philipp Hagemeister
31cb6d8fef Merge remote-tracking branch 'rzhxeo/rtmpdump' 2013-11-25 06:16:18 +01:00
Philipp Hagemeister
daa0dd2973 release 2013.11.25.1 2013-11-25 06:06:39 +01:00
Philipp Hagemeister
de79c46c8f [viki] Fix subtitle extraction 2013-11-25 06:06:18 +01:00
Philipp Hagemeister
94ccb6fa2e [viki] Fix subtitles extraction 2013-11-25 05:58:04 +01:00
Philipp Hagemeister
07e4035879 [viki] Fix uploader extraction 2013-11-25 05:57:55 +01:00
Philipp Hagemeister
d0efb9ec9a [tests] Remove global_setup function 2013-11-25 03:47:32 +01:00
Philipp Hagemeister
ac05067d3d release 2013.11.25 2013-11-25 03:37:49 +01:00
Philipp Hagemeister
113577e155 [generic] Improve detection
Allow download of http://goo.gl/7X5tOk
Fixes #1818
2013-11-25 03:35:53 +01:00
Philipp Hagemeister
79d09f47c2 Merge branch 'opener-to-ydl' 2013-11-25 03:30:37 +01:00
Philipp Hagemeister
c059bdd432 Remove quality_name field and improve zdf extractor 2013-11-25 03:28:55 +01:00
Philipp Hagemeister
02dbf93f0e [zdf/common] Use API in ZDF extractor.
This also comes with a lot of extra format fields
Fixes #1518
2013-11-25 03:13:22 +01:00
Philipp Hagemeister
1fb2bcbbf7 [viki] Make uploader field optional (#1813) 2013-11-25 02:02:34 +01:00
Jaime Marquínez Ferrándiz
16e055849e Update the keywords tests for the rename of the old ComedyCentralIE 2013-11-24 22:13:20 +01:00
Jaime Marquínez Ferrándiz
66cfab4226 [comedycentral] Add support for comedycentral.com videos (closes #1824)
It's a subclass of MTVIE

The extractor for colbertnation.com and thedailyshow.com is called now ComedyCentralShowsIE
2013-11-24 21:18:35 +01:00
Philipp Hagemeister
6d88bc37a3 [viki] Skip travis test
Also provide a better error message for geoblocked videos.
2013-11-24 15:28:50 +01:00
Philipp Hagemeister
b7553b2554 [vik] Clarify output 2013-11-24 15:20:16 +01:00
Philipp Hagemeister
e03db0a077 Merge branch 'master' into opener-to-ydl 2013-11-24 15:18:44 +01:00
Philipp Hagemeister
a1ee09e815 Document proxy 2013-11-24 15:03:25 +01:00
Jaime Marquínez Ferrándiz
267ed0c5d3 [collegehumor] Encode the xml before calling xml.etree.ElementTree.fromstring (fixes #1822)
Uses a new helper method in InfoExtractor: _download_xml
2013-11-24 14:59:19 +01:00
Jaime Marquínez Ferrándiz
f459d17018 [youtube] Add an extractor for downloading the watch history (closes #1821) 2013-11-24 14:33:50 +01:00
Jaime Marquínez Ferrándiz
dc65dcbb6d [mixcloud] The description field may be missing (fixes #1819) 2013-11-24 11:28:44 +01:00
Jaime Marquínez Ferrándiz
d214fdb8fe [brightcove] Don't use 'or' with the xml nodes, use the 'value' attribute instead 2013-11-24 11:02:34 +01:00
Philipp Hagemeister
138df537ff release 2013.11.24.1 2013-11-24 07:51:56 +01:00
Philipp Hagemeister
0c7c19d6bc [clipfish] Add extractor (Fixes #1760) 2013-11-24 07:51:44 +01:00
Philipp Hagemeister
eaaafc59c2 release 2013.11.24 2013-11-24 07:30:34 +01:00
Philipp Hagemeister
382ed50e0e [viki] Add extractor (fixes #1813) 2013-11-24 07:30:05 +01:00
Philipp Hagemeister
66ec019240 [youtube] do not use variable name twice 2013-11-24 06:54:26 +01:00
Philipp Hagemeister
bd49928f7a [niconico] Clarify download 2013-11-24 06:53:50 +01:00
Philipp Hagemeister
23e6d50d73 [bandcamp] Remove unused variable 2013-11-24 06:52:53 +01:00
Philipp Hagemeister
2e767313e4 [update] fix error 2013-11-24 06:52:21 +01:00
Philipp Hagemeister
38b2db6a66 Credit @takuya0301 for niconico 2013-11-24 06:39:49 +01:00
Philipp Hagemeister
13ebea791f [niconico] Simplify and make work with old Python versions
The website requires SSLv3, otherwise it just times out during SSL negotiation.
2013-11-24 06:39:10 +01:00
Philipp Hagemeister
4c9c57428f Merge remote-tracking branch 'takuya0301/niconico' 2013-11-24 06:09:11 +01:00
Philipp Hagemeister
8bf9319e9c Simplify logger code(#1811) 2013-11-24 06:08:11 +01:00
Philipp Hagemeister
4914120727 Merge remote-tracking branch 'iTaybb/master' 2013-11-24 06:07:12 +01:00
Jaime Marquínez Ferrándiz
36de0a0e1a [brightcove] Set the 'videoPlayer' value to the 'videoId' if it's missing in the parameters (fixes #1815) 2013-11-23 23:27:15 +01:00
Philipp Hagemeister
e5c146d586 [streamcloud] skip test on travis 2013-11-23 15:57:42 +01:00
Takuya Tsuchida
52ad14aeb0 Add support for niconico 2013-11-23 18:19:44 +09:00
Itay Brandes
43afe28588 Log to an external logger (fixes #1810)
Sadly applications using youtube-dl's python sources can't directly
access it's log stream. It's pretty much limited to stdout and stderr
only.

It should log to logging.Logger instance passed to YoutubeDL's params
dictionary.
2013-11-23 10:22:18 +02:00
Philipp Hagemeister
a87b0615aa release 2013.11.22.2 2013-11-22 23:08:15 +01:00
Philipp Hagemeister
d7386f6276 [update] Check if version from repository is newer before updating
Closes #1704
2013-11-22 23:05:58 +01:00
Philipp Hagemeister
081640940e Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-22 22:46:57 +01:00
Philipp Hagemeister
7012b23c94 Match --download-archive during playlist processing (Fixes #1745) 2013-11-22 22:46:46 +01:00
Jaime Marquínez Ferrándiz
d3b30148ed [bambuser:channel] Update test 2013-11-22 21:26:31 +01:00
Jaime Marquínez Ferrándiz
9f79463803 [howcast] update test's checksum 2013-11-22 21:25:12 +01:00
Jaime Marquínez Ferrándiz
d35dc6d3b5 [bandcamp] move the album test to the album extractor and return a single track instead of a playlist 2013-11-22 21:19:31 +01:00
Philipp Hagemeister
50123be421 release 2013.11.22.1 2013-11-22 20:23:55 +01:00
Philipp Hagemeister
3f8ced5144 Merge remote-tracking branch 'jaimeMF/yt-playlists' 2013-11-22 20:11:54 +01:00
Philipp Hagemeister
00ea0f11eb Print full title in --get-title output (#1806) 2013-11-22 20:00:35 +01:00
Philipp Hagemeister
dca0872056 Move the opener to the YoutubeDL object.
This is the first step towards being able to just import youtube_dl and start using it.
Apart from removing global state, this would fix problems like #1805.
2013-11-22 19:57:52 +01:00
Philipp Hagemeister
0b63aed8df [update] do not assign to unused variables 2013-11-22 19:15:36 +01:00
Philipp Hagemeister
15c3adbb16 Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-22 19:08:33 +01:00
Philipp Hagemeister
f143a42fe6 [bandcamp] Skip album test 2013-11-22 19:08:25 +01:00
Jaime Marquínez Ferrándiz
241650c7ff [vimeo] Fix the extraction of vimeo pro and player.vimeo.com videos 2013-11-22 18:20:31 +01:00
Philipp Hagemeister
bfe7439a20 release 2013.11.22 2013-11-22 17:46:26 +01:00
Philipp Hagemeister
cffa6aa107 [bandcamp] Support trackinfo-style songs (Fixes #1270) 2013-11-22 17:44:55 +01:00
Philipp Hagemeister
02e4ebbbad [streamcloud] Add IE (Fixes #1801) 2013-11-22 17:19:22 +01:00
Philipp Hagemeister
ab009f59ef [toutv] Fix a typo 2013-11-22 17:18:03 +01:00
Jaime Marquínez Ferrándiz
0980426559 [bandcamp] add support for albums (reported in #1270) 2013-11-22 16:05:14 +01:00
Philipp Hagemeister
b1c9c66936 Remove unnecessary slash in setup.py (Fixes #1778) 2013-11-21 23:26:28 +01:00
Jaime Marquínez Ferrándiz
a6a173c2fd utils.shell_quote: Convert the args to unicode strings
The youtube test video failed with `UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 34: ordinal not in range(128)`, the problem was with the filenames being encoded.
2013-11-21 14:09:28 +01:00
Philipp Hagemeister
2bb683c201 release 2013.11.21 2013-11-21 13:59:33 +01:00
Jaime Marquínez Ferrándiz
64bb5187f5 [soundcloud] Retrieve the file url using the client_id for the iPhone (fixes #1798)
The desktop's client_id always give the rtmp url, but with the iPhone one it returns the http url if it's available.
2013-11-21 13:16:19 +01:00
Philipp Hagemeister
9e4f50a8ae [sztv] skip test, site is undergoing mid-term maintenance 2013-11-20 09:59:03 +01:00
Philipp Hagemeister
0190eecc00 [nhl] Make NHLVideocenter IE_DESC fit with other descriptions 2013-11-20 09:45:29 +01:00
Philipp Hagemeister
ca872a4c0b [spankwire] Fix description search 2013-11-20 09:23:53 +01:00
Philipp Hagemeister
f2e87ef4fa [anitube] Skip test (on travis) 2013-11-20 07:46:44 +01:00
Philipp Hagemeister
0ad97bbc05 [spankwire] fix check for description 2013-11-20 07:45:32 +01:00
Philipp Hagemeister
c4864091a1 [videopremium] Support new crazy redirect scheme 2013-11-20 07:43:21 +01:00
Philipp Hagemeister
9a98a466b3 [toutv] really skip test 2013-11-20 07:37:22 +01:00
Philipp Hagemeister
f99e0f1ed6 Adapt age restriction tests to new .info.json filenames 2013-11-20 07:37:07 +01:00
Philipp Hagemeister
d323bcb152 release 2013.11.20 2013-11-20 07:25:17 +01:00
Philipp Hagemeister
da6a795fdb [escapist] Fix title search 2013-11-20 07:23:23 +01:00
Philipp Hagemeister
c5edcde21f [escapist] upper-case URL 2013-11-20 06:56:59 +01:00
Philipp Hagemeister
15ff3c831e [escapist] Fix syntax error 2013-11-20 06:55:07 +01:00
Philipp Hagemeister
100959a6d9 [escapist] Add support for HD format (Closes #1755) 2013-11-20 06:52:08 +01:00
Philipp Hagemeister
0a120f74b2 Credit @diffycat for anitube 2013-11-20 06:36:00 +01:00
Philipp Hagemeister
8f05351984 [anitube] Minor fixes (#1776) 2013-11-20 06:35:02 +01:00
Philipp Hagemeister
4eb92208a3 Adapt test to changed .info.json name 2013-11-20 06:34:48 +01:00
Philipp Hagemeister
71791f414c Merge remote-tracking branch 'diffycat/master' 2013-11-20 06:28:13 +01:00
Philipp Hagemeister
f3682997d7 Clean up unused imports and other minor mistakes 2013-11-20 06:27:48 +01:00
Philipp Hagemeister
cc13cc0251 [teamcoco] Correct error 2013-11-20 06:25:33 +01:00
Philipp Hagemeister
86bd5f2ca9 Merge remote-tracking branch 'dz0ny/patch-1' 2013-11-20 06:21:05 +01:00
Philipp Hagemeister
8694c60000 import json for --dump-json 2013-11-20 06:18:24 +01:00
Mohamedh Fazal
9d1538182f Add an option to dump json information 2013-11-20 06:14:57 +01:00
Philipp Hagemeister
5904088811 Add support for tou.tv (Fixes #1792) 2013-11-20 06:13:19 +01:00
Jaime Marquínez Ferrándiz
69545c2aff [d8] inherit from CanalplusIE
it reuses the same extraction process
2013-11-19 20:44:20 +01:00
Jaime Marquínez Ferrándiz
495da337ae Merge pull request #1758 from migbac/master
Add support for d8.tv
2013-11-19 20:43:14 +01:00
Philipp Hagemeister
34b3afc7be release 2013.11.19 2013-11-19 12:41:01 +01:00
Philipp Hagemeister
00373a4c5d Merge pull request #1790 from rg3/console-title
Correctly write and restore the console title on the stack (fixes #1782)
2013-11-18 07:50:10 -08:00
Philipp Hagemeister
cb7dfeeac4 [youtube] only allow domain name to be upper-case (#1786) 2013-11-18 16:42:35 +01:00
Jaime Marquínez Ferrándiz
efd6c574a2 Correctly write and restore the console title on the stack (fixes #1782) 2013-11-18 16:35:41 +01:00
Philipp Hagemeister
4113e6ab56 [auengine] Do not return unnecessary ext 2013-11-18 14:36:01 +01:00
Philipp Hagemeister
9a942a4671 release 2013.11.18.1 2013-11-18 13:56:53 +01:00
Philipp Hagemeister
9906d397a0 [auengine] Simplify 2013-11-18 13:56:45 +01:00
Philipp Hagemeister
ae8f787141 Remove iPhone from user agent. This breaks a lot of extractors
In the future, it might be worth investigating whether we get better content when we claime to be an iPhone.
2013-11-18 13:52:26 +01:00
Philipp Hagemeister
a81b4d5c8f release 2013.11.18 2013-11-18 13:30:43 +01:00
Philipp Hagemeister
887c6acdf2 Support multiple embedded YouTube URLs (Fixes #1787) 2013-11-18 13:28:26 +01:00
Philipp Hagemeister
83aa529330 Support protocol-independent URLs (#1787) 2013-11-18 13:18:17 +01:00
Philipp Hagemeister
96b31b6533 Add iPhone to UA (#1746) 2013-11-18 13:05:58 +01:00
Philipp Hagemeister
fccd377198 Suppor embed-only videos (Fixes #1746) 2013-11-18 13:05:18 +01:00
rzhxeo
2b35c9ef74 Merge branch 'master' into rtmpdump
Conflicts:
	youtube_dl/FileDownloader.py

Merge
2013-11-18 00:27:06 +01:00
Philipp Hagemeister
73c566695f release 2013.11.17 2013-11-17 22:14:13 +01:00
Philipp Hagemeister
63b7b7224a [MTVIE] Try with RTMP URL if download fails
This fixes youtube-dl http://www.southpark.de/clips/155251/cartman-vs-the-dog-whisperer
2013-11-17 22:11:40 +01:00
Philipp Hagemeister
ce80c8b8ee Merge pull request #1784 from rzhxeo/southpark
Add support for southpark.de
2013-11-17 12:15:13 -08:00
Philipp Hagemeister
749febf4d1 Allow --console-title when --quiet is given (Fixes #1783) 2013-11-17 21:12:50 +01:00
Philipp Hagemeister
bdde425cbe Save and restore console title (Fixes #1782) 2013-11-17 21:10:11 +01:00
rzhxeo
746f491f82 Add support for southpark.de 2013-11-17 17:54:47 +01:00
rzhxeo
1672647ade [SouthParkStudiosIE] Move from _TEST to _TESTS 2013-11-17 17:43:58 +01:00
rzhxeo
90b6bbc38c [SouthParkStudiosIE] Also detect urls without http:// or www 2013-11-17 17:42:24 +01:00
Philipp Hagemeister
ce02ed60f2 Remove * imports 2013-11-17 16:47:52 +01:00
Philipp Hagemeister
1e5b9a95fd Move console_title to YoutubeDL 2013-11-17 11:39:52 +01:00
Philipp Hagemeister
1d699755e0 [youtube] Add view_count (Fixes #1781) 2013-11-17 11:06:16 +01:00
Philipp Hagemeister
ddf49c6344 [arte] remove two typos 2013-11-17 11:05:49 +01:00
Anton Larionov
ba3881dffd Add support for anitube.se (#1417) 2013-11-16 18:26:34 +04:00
Philipp Hagemeister
d1c252048b [redtube] Do not test md5, seems to vary 2013-11-16 10:30:09 +01:00
Philipp Hagemeister
eab2724138 [gamekings] Do not test md5 sum, precise file changes regularly 2013-11-16 02:32:23 +01:00
Philipp Hagemeister
21ea3e06c9 [gamekings] remove unnecessary import 2013-11-16 02:31:02 +01:00
Philipp Hagemeister
52d703d3d1 [tvp] Skip tests 2013-11-16 02:09:30 +01:00
Philipp Hagemeister
ce152341a1 [bambuser] Do not test for MD5, seems to be flaky 2013-11-16 01:59:28 +01:00
Philipp Hagemeister
f058e34011 [dailymotion] Fix playlists 2013-11-16 01:56:23 +01:00
Philipp Hagemeister
b5349e8721 Fix indentation of (best) and (worst) in --list-formats 2013-11-16 01:39:45 +01:00
Philipp Hagemeister
7150858d49 [spiegel] Implement format selection 2013-11-16 01:33:12 +01:00
Philipp Hagemeister
91c7271aab Add automatic generation of format note based on bitrate and codecs 2013-11-16 01:08:43 +01:00
Philipp Hagemeister
aa13b2dffd release 2013.11.15.1 2013-11-15 14:35:00 +01:00
Philipp Hagemeister
fc2ef392be [ted] Fix playlists (Fixes #1770) 2013-11-15 14:33:51 +01:00
Philipp Hagemeister
463a908705 [ted] simplify 2013-11-15 14:06:38 +01:00
Jaime Marquínez Ferrándiz
d24ffe1cfa [rtlnow] Remove the test for nitro
The videos expire.
2013-11-15 12:57:59 +01:00
Jaime Marquínez Ferrándiz
78fb87b283 Don't accept '>' inside the content attribute in OpenGraph regexes 2013-11-15 12:54:13 +01:00
Jaime Marquínez Ferrándiz
ab2d524780 Improve the OpenGraph regex
* Do not accept '>' between the property and content attributes.
* Recognize the properties if the content attribute is before the property attribute using two regexes (fixes the extraction of the description for SlideshareIE).
2013-11-15 12:24:54 +01:00
Jaime Marquínez Ferrándiz
85d61685f1 [tvp] Update the title and the description of the test video 2013-11-15 12:10:22 +01:00
Jaime Marquínez Ferrándiz
b9643eed7c [youtube:channel] Fix the extraction of autogenerated channels
The ajax pages are empty, now it looks directly in the channel's /videos page
2013-11-15 11:51:45 +01:00
Jaime Marquínez Ferrándiz
feee2ecfa9 Pass the 'download' argument to 'process_video_result' (fixes #1769) 2013-11-15 11:04:26 +01:00
Janez Troha
08bc37cdd0 Update test_write_info_json.py 2013-11-13 18:55:49 +01:00
Janez Troha
9771cceb2c Fix filename extension leaking to json filename
Makes writeinfojson behaving exactly as writethumbnail in case where filename contains mediafile extension.

Case:

video.mp4 converted to music.mp3 would yield music.mp4.info.json instead music.mp3.info.json or music.info.json
2013-11-13 18:34:03 +01:00
Jaime Marquínez Ferrándiz
880e1c529d [youtube:playlist] Login into youtube if requested (fixes #1757)
Allows to download private playlists
2013-11-13 16:39:11 +01:00
Jaime Marquínez Ferrándiz
dcbb45803f [youtube:playlist] Don't use the gdata api (closes #1508)
Parse the playlist pages instead
2013-11-13 16:26:50 +01:00
migbac
0bd59f3723 Add support for d8.tv 2013-11-12 23:32:03 +01:00
rzhxeo
4894fe8c5b Report download progress of rtmpdump 2013-11-09 11:14:40 +01:00
Rogério Brito
d5a9bb4ea9 extractor: youtube: Swap video dimensions to match standard practice.
While working on this, I thought about simplifying things like changing
480x854 to 480p, and that seemed like a good option, until I realized that
people (me included) usually link the concept of some number followed by a p
with the video being 16:9.

So, we would be losing some information and, as we all know,
[explicit is better than implicit][*].

[*]: http://www.python.org/dev/peps/pep-0020/

This closes #1446.

Signed-off-by: Rogério Brito <rbrito@ime.usp.br>
2013-10-19 14:04:44 -03:00
88 changed files with 1996 additions and 717 deletions

View File

@@ -56,7 +56,8 @@ which means you can modify it, redistribute it or use it however you like.
--no-playlist download only the currently playing video --no-playlist download only the currently playing video
--age-limit YEARS download only videos suitable for the given age --age-limit YEARS download only videos suitable for the given age
--download-archive FILE Download only videos not present in the archive --download-archive FILE Download only videos not present in the archive
file. Record all downloaded videos in it. file. Record the IDs of all downloaded videos in
it.
## Download Options: ## Download Options:
-r, --rate-limit LIMIT maximum download rate in bytes per second (e.g. -r, --rate-limit LIMIT maximum download rate in bytes per second (e.g.
@@ -123,17 +124,18 @@ which means you can modify it, redistribute it or use it however you like.
--get-description simulate, quiet but print video description --get-description simulate, quiet but print video description
--get-filename simulate, quiet but print output filename --get-filename simulate, quiet but print output filename
--get-format simulate, quiet but print output format --get-format simulate, quiet but print output format
-j, --dump-json simulate, quiet but print JSON information
--newline output progress bar as new lines --newline output progress bar as new lines
--no-progress do not print progress bar --no-progress do not print progress bar
--console-title display progress in console titlebar --console-title display progress in console titlebar
-v, --verbose print various debugging information -v, --verbose print various debugging information
--dump-intermediate-pages print downloaded pages to debug problems(very --dump-intermediate-pages print downloaded pages to debug problems(very
verbose) verbose)
--write-pages Write downloaded pages to files in the current --write-pages Write downloaded intermediary pages to files in
directory the current directory to debug problems
## Video Format Options: ## Video Format Options:
-f, --format FORMAT video format code, specifiy the order of -f, --format FORMAT video format code, specify the order of
preference using slashes: "-f 22/17/18". "-f mp4" preference using slashes: "-f 22/17/18". "-f mp4"
and "-f flv" are also supported and "-f flv" are also supported
--all-formats download all available video formats --all-formats download all available video formats

View File

@@ -1,10 +1,21 @@
__youtube_dl() __youtube_dl()
{ {
local cur prev opts local cur prev opts fileopts diropts keywords
COMPREPLY=() COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}" cur="${COMP_WORDS[COMP_CWORD]}"
prev="${COMP_WORDS[COMP_CWORD-1]}"
opts="{{flags}}" opts="{{flags}}"
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater" keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
fileopts="-a|--batch-file|--download-archive|--cookies"
diropts="--cache-dir"
if [[ ${prev} =~ ${fileopts} ]]; then
COMPREPLY=( $(compgen -f -- ${cur}) )
return 0
elif [[ ${prev} =~ ${diropts} ]]; then
COMPREPLY=( $(compgen -d -- ${cur}) )
return 0
fi
if [[ ${cur} =~ : ]]; then if [[ ${cur} =~ : ]]; then
COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) ) COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )

View File

@@ -48,7 +48,7 @@ else:
'data_files': [ # Installing system-wide would require sudo... 'data_files': [ # Installing system-wide would require sudo...
('etc/bash_completion.d', ['youtube-dl.bash-completion']), ('etc/bash_completion.d', ['youtube-dl.bash-completion']),
('share/doc/youtube_dl', ['README.txt']), ('share/doc/youtube_dl', ['README.txt']),
('share/man/man1/', ['youtube-dl.1']) ('share/man/man1', ['youtube-dl.1'])
] ]
} }
if setuptools_available: if setuptools_available:

View File

@@ -12,10 +12,6 @@ from youtube_dl import YoutubeDL
from youtube_dl.utils import preferredencoding from youtube_dl.utils import preferredencoding
def global_setup():
youtube_dl._setup_opener(timeout=10)
def get_params(override=None): def get_params(override=None):
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
"parameters.json") "parameters.json")

View File

@@ -39,5 +39,6 @@
"writeinfojson": true, "writeinfojson": true,
"writesubtitles": false, "writesubtitles": false,
"allsubtitles": false, "allsubtitles": false,
"listssubtitles": false "listssubtitles": false,
"socket_timeout": 20
} }

View File

@@ -6,8 +6,7 @@ import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import global_setup, try_rm from test.helper import try_rm
global_setup()
from youtube_dl import YoutubeDL from youtube_dl import YoutubeDL
@@ -24,7 +23,7 @@ def _download_restricted(url, filename, age):
} }
ydl = YoutubeDL(params) ydl = YoutubeDL(params)
ydl.add_default_info_extractors() ydl.add_default_info_extractors()
json_filename = filename + '.info.json' json_filename = os.path.splitext(filename)[0] + '.info.json'
try_rm(json_filename) try_rm(json_filename)
ydl.download([url]) ydl.download([url])
res = os.path.exists(json_filename) res = os.path.exists(json_filename)

View File

@@ -100,10 +100,15 @@ class TestAllURLsMatching(unittest.TestCase):
def test_keywords(self): def test_keywords(self):
self.assertMatch(':ytsubs', ['youtube:subscriptions']) self.assertMatch(':ytsubs', ['youtube:subscriptions'])
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions']) self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
self.assertMatch(':thedailyshow', ['ComedyCentral']) self.assertMatch(':ythistory', ['youtube:history'])
self.assertMatch(':tds', ['ComedyCentral']) self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
self.assertMatch(':colbertreport', ['ComedyCentral']) self.assertMatch(':tds', ['ComedyCentralShows'])
self.assertMatch(':cr', ['ComedyCentral']) self.assertMatch(':colbertreport', ['ComedyCentralShows'])
self.assertMatch(':cr', ['ComedyCentralShows'])
def test_vimeo_matching(self):
self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user'])
if __name__ == '__main__': if __name__ == '__main__':

View File

@@ -9,12 +9,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import ( from test.helper import (
get_params, get_params,
get_testcases, get_testcases,
global_setup,
try_rm, try_rm,
md5, md5,
report_warning report_warning
) )
global_setup()
import hashlib import hashlib
@@ -103,7 +101,7 @@ def generator(test_case):
tc_filename = get_tc_filename(tc) tc_filename = get_tc_filename(tc)
try_rm(tc_filename) try_rm(tc_filename)
try_rm(tc_filename + '.part') try_rm(tc_filename + '.part')
try_rm(tc_filename + '.info.json') try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
try_rm_tcs_files() try_rm_tcs_files()
try: try:
try_num = 1 try_num = 1
@@ -130,11 +128,12 @@ def generator(test_case):
if not test_case.get('params', {}).get('skip_download', False): if not test_case.get('params', {}).get('skip_download', False):
self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename) self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
self.assertTrue(tc_filename in finished_hook_called) self.assertTrue(tc_filename in finished_hook_called)
self.assertTrue(os.path.exists(tc_filename + '.info.json')) info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
self.assertTrue(os.path.exists(info_json_fn))
if 'md5' in tc: if 'md5' in tc:
md5_for_file = _file_md5(tc_filename) md5_for_file = _file_md5(tc_filename)
self.assertEqual(md5_for_file, tc['md5']) self.assertEqual(md5_for_file, tc['md5'])
with io.open(tc_filename + '.info.json', encoding='utf-8') as infof: with io.open(info_json_fn, encoding='utf-8') as infof:
info_dict = json.load(infof) info_dict = json.load(infof)
for (info_field, expected) in tc.get('info_dict', {}).items(): for (info_field, expected) in tc.get('info_dict', {}).items():
if isinstance(expected, compat_str) and expected.startswith('md5:'): if isinstance(expected, compat_str) and expected.startswith('md5:'):

View File

@@ -8,20 +8,21 @@ import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup from test.helper import FakeYDL
global_setup()
from youtube_dl.extractor import ( from youtube_dl.extractor import (
DailymotionPlaylistIE, DailymotionPlaylistIE,
DailymotionUserIE, DailymotionUserIE,
VimeoChannelIE, VimeoChannelIE,
VimeoUserIE,
UstreamChannelIE, UstreamChannelIE,
SoundcloudSetIE, SoundcloudSetIE,
SoundcloudUserIE, SoundcloudUserIE,
LivestreamIE, LivestreamIE,
NHLVideocenterIE, NHLVideocenterIE,
BambuserChannelIE, BambuserChannelIE,
BandcampAlbumIE
) )
@@ -54,6 +55,14 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], u'Vimeo Tributes') self.assertEqual(result['title'], u'Vimeo Tributes')
self.assertTrue(len(result['entries']) > 24) self.assertTrue(len(result['entries']) > 24)
def test_vimeo_user(self):
dl = FakeYDL()
ie = VimeoUserIE(dl)
result = ie.extract('http://vimeo.com/nkistudio/videos')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'Nki')
self.assertTrue(len(result['entries']) > 65)
def test_ustream_channel(self): def test_ustream_channel(self):
dl = FakeYDL() dl = FakeYDL()
ie = UstreamChannelIE(dl) ie = UstreamChannelIE(dl)
@@ -101,7 +110,15 @@ class TestPlaylists(unittest.TestCase):
result = ie.extract('http://bambuser.com/channel/pixelversity') result = ie.extract('http://bambuser.com/channel/pixelversity')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'pixelversity') self.assertEqual(result['title'], u'pixelversity')
self.assertTrue(len(result['entries']) >= 66) self.assertTrue(len(result['entries']) >= 60)
def test_bandcamp_album(self):
dl = FakeYDL()
ie = BandcampAlbumIE(dl)
result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'Nightmare Night EP')
self.assertTrue(len(result['entries']) >= 4)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -6,8 +6,7 @@ import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup, md5 from test.helper import FakeYDL, md5
global_setup()
from youtube_dl.extractor import ( from youtube_dl.extractor import (
@@ -73,7 +72,7 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'vtt' self.DL.params['subtitlesformat'] = 'vtt'
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7') self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
def test_youtube_list_subtitles(self): def test_youtube_list_subtitles(self):
self.DL.expect_warning(u'Video doesn\'t have automatic captions') self.DL.expect_warning(u'Video doesn\'t have automatic captions')

View File

@@ -24,6 +24,8 @@ from youtube_dl.utils import (
xpath_with_ns, xpath_with_ns,
smuggle_url, smuggle_url,
unsmuggle_url, unsmuggle_url,
shell_quote,
encodeFilename,
) )
if sys.version_info < (3, 0): if sys.version_info < (3, 0):
@@ -170,6 +172,10 @@ class TestUtil(unittest.TestCase):
self.assertEqual(res_url, url) self.assertEqual(res_url, url)
self.assertEqual(res_data, None) self.assertEqual(res_data, None)
def test_shell_quote(self):
args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')]
self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""")
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -7,8 +7,7 @@ import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_params, global_setup, try_rm from test.helper import get_params, try_rm
global_setup()
import io import io

View File

@@ -7,8 +7,7 @@ import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_params, global_setup from test.helper import get_params
global_setup()
import io import io
@@ -31,7 +30,7 @@ params = get_params({
TEST_ID = 'BaW_jenozKc' TEST_ID = 'BaW_jenozKc'
INFO_JSON_FILE = TEST_ID + '.mp4.info.json' INFO_JSON_FILE = TEST_ID + '.info.json'
DESCRIPTION_FILE = TEST_ID + '.mp4.description' DESCRIPTION_FILE = TEST_ID + '.mp4.description'
EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐 EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐

View File

@@ -6,8 +6,7 @@ import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup from test.helper import FakeYDL
global_setup()
from youtube_dl.extractor import ( from youtube_dl.extractor import (
@@ -27,7 +26,7 @@ class TestYoutubeLists(unittest.TestCase):
def test_youtube_playlist(self): def test_youtube_playlist(self):
dl = FakeYDL() dl = FakeYDL()
ie = YoutubePlaylistIE(dl) ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')[0] result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'ytdl test PL') self.assertEqual(result['title'], 'ytdl test PL')
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
@@ -44,13 +43,13 @@ class TestYoutubeLists(unittest.TestCase):
def test_issue_673(self): def test_issue_673(self):
dl = FakeYDL() dl = FakeYDL()
ie = YoutubePlaylistIE(dl) ie = YoutubePlaylistIE(dl)
result = ie.extract('PLBB231211A4F62143')[0] result = ie.extract('PLBB231211A4F62143')
self.assertTrue(len(result['entries']) > 25) self.assertTrue(len(result['entries']) > 25)
def test_youtube_playlist_long(self): def test_youtube_playlist_long(self):
dl = FakeYDL() dl = FakeYDL()
ie = YoutubePlaylistIE(dl) ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')[0] result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertTrue(len(result['entries']) >= 799) self.assertTrue(len(result['entries']) >= 799)
@@ -58,7 +57,7 @@ class TestYoutubeLists(unittest.TestCase):
#651 #651
dl = FakeYDL() dl = FakeYDL()
ie = YoutubePlaylistIE(dl) ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')[0] result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
self.assertFalse('pElCt5oNDuI' in ytie_results) self.assertFalse('pElCt5oNDuI' in ytie_results)
self.assertFalse('KdPEApIVdWM' in ytie_results) self.assertFalse('KdPEApIVdWM' in ytie_results)
@@ -66,7 +65,7 @@ class TestYoutubeLists(unittest.TestCase):
def test_youtube_playlist_empty(self): def test_youtube_playlist_empty(self):
dl = FakeYDL() dl = FakeYDL()
ie = YoutubePlaylistIE(dl) ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')[0] result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(len(result['entries']), 0) self.assertEqual(len(result['entries']), 0)
@@ -74,7 +73,7 @@ class TestYoutubeLists(unittest.TestCase):
dl = FakeYDL() dl = FakeYDL()
ie = YoutubePlaylistIE(dl) ie = YoutubePlaylistIE(dl)
# TODO find a > 100 (paginating?) videos course # TODO find a > 100 (paginating?) videos course
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')[0] result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
entries = result['entries'] entries = result['entries']
self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs') self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
self.assertEqual(len(entries), 25) self.assertEqual(len(entries), 25)
@@ -84,22 +83,22 @@ class TestYoutubeLists(unittest.TestCase):
dl = FakeYDL() dl = FakeYDL()
ie = YoutubeChannelIE(dl) ie = YoutubeChannelIE(dl)
#test paginated channel #test paginated channel
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0] result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
self.assertTrue(len(result['entries']) > 90) self.assertTrue(len(result['entries']) > 90)
#test autogenerated channel #test autogenerated channel
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')[0] result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
self.assertTrue(len(result['entries']) >= 18) self.assertTrue(len(result['entries']) >= 18)
def test_youtube_user(self): def test_youtube_user(self):
dl = FakeYDL() dl = FakeYDL()
ie = YoutubeUserIE(dl) ie = YoutubeUserIE(dl)
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0] result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
self.assertTrue(len(result['entries']) >= 320) self.assertTrue(len(result['entries']) >= 320)
def test_youtube_safe_search(self): def test_youtube_safe_search(self):
dl = FakeYDL() dl = FakeYDL()
ie = YoutubePlaylistIE(dl) ie = YoutubePlaylistIE(dl)
result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0] result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')
self.assertEqual(len(result['entries']), 2) self.assertEqual(len(result['entries']), 2)
def test_youtube_show(self): def test_youtube_show(self):
@@ -108,5 +107,14 @@ class TestYoutubeLists(unittest.TestCase):
result = ie.extract('http://www.youtube.com/show/airdisasters') result = ie.extract('http://www.youtube.com/show/airdisasters')
self.assertTrue(len(result) >= 3) self.assertTrue(len(result) >= 3)
def test_youtube_mix(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
result = ie.extract('http://www.youtube.com/watch?v=lLJf9qJHR3E&list=RDrjFaenf1T-Y')
entries = result['entries']
self.assertTrue(len(entries) >= 20)
original_video = entries[0]
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -6,9 +6,6 @@ import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import global_setup
global_setup()
import io import io
import re import re

View File

@@ -1,19 +1,16 @@
import math
import os import os
import re import re
import subprocess import subprocess
import sys import sys
import time import time
if os.name == 'nt':
import ctypes
from .utils import ( from .utils import (
compat_urllib_error, compat_urllib_error,
compat_urllib_request, compat_urllib_request,
ContentTooShortError, ContentTooShortError,
determine_ext, determine_ext,
encodeFilename, encodeFilename,
format_bytes,
sanitize_open, sanitize_open,
timeconvert, timeconvert,
) )
@@ -56,20 +53,6 @@ class FileDownloader(object):
self._progress_hooks = [] self._progress_hooks = []
self.params = params self.params = params
@staticmethod
def format_bytes(bytes):
if bytes is None:
return 'N/A'
if type(bytes) is str:
bytes = float(bytes)
if bytes == 0.0:
exponent = 0
else:
exponent = int(math.log(bytes, 1024.0))
suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
converted = float(bytes) / float(1024 ** exponent)
return '%.2f%s' % (converted, suffix)
@staticmethod @staticmethod
def format_seconds(seconds): def format_seconds(seconds):
(mins, secs) = divmod(seconds, 60) (mins, secs) = divmod(seconds, 60)
@@ -120,7 +103,7 @@ class FileDownloader(object):
def format_speed(speed): def format_speed(speed):
if speed is None: if speed is None:
return '%10s' % '---b/s' return '%10s' % '---b/s'
return '%10s' % ('%s/s' % FileDownloader.format_bytes(speed)) return '%10s' % ('%s/s' % format_bytes(speed))
@staticmethod @staticmethod
def best_block_size(elapsed_time, bytes): def best_block_size(elapsed_time, bytes):
@@ -151,16 +134,8 @@ class FileDownloader(object):
def to_stderr(self, message): def to_stderr(self, message):
self.ydl.to_screen(message) self.ydl.to_screen(message)
def to_cons_title(self, message): def to_console_title(self, message):
"""Set console/terminal window title to message.""" self.ydl.to_console_title(message)
if not self.params.get('consoletitle', False):
return
if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
# c_wchar_p() might not be necessary if `message` is
# already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
elif 'TERM' in os.environ:
self.to_screen('\033]0;%s\007' % message, skip_eol=True)
def trouble(self, *args, **kargs): def trouble(self, *args, **kargs):
self.ydl.trouble(*args, **kargs) self.ydl.trouble(*args, **kargs)
@@ -249,7 +224,7 @@ class FileDownloader(object):
else: else:
self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' % self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
(clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True) (clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' % self.to_console_title(u'youtube-dl - %s of %s at %s ETA %s' %
(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip())) (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
def report_resuming_byte(self, resume_len): def report_resuming_byte(self, resume_len):
@@ -281,6 +256,61 @@ class FileDownloader(object):
(clear_line, data_len_str, self.format_seconds(tot_time))) (clear_line, data_len_str, self.format_seconds(tot_time)))
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live): def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live):
def run_rtmpdump(args):
start = time.time()
resume_percent = None
resume_downloaded_data_len = None
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
cursor_in_new_line = True
proc_stderr_closed = False
while not proc_stderr_closed:
# read line from stderr
line = u''
while True:
char = proc.stderr.read(1)
if not char:
proc_stderr_closed = True
break
if char in [b'\r', b'\n']:
break
line += char.decode('ascii', 'replace')
if not line:
# proc_stderr_closed is True
continue
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
if mobj:
downloaded_data_len = int(float(mobj.group(1))*1024)
percent = float(mobj.group(2))
if not resume_percent:
resume_percent = percent
resume_downloaded_data_len = downloaded_data_len
eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
data_len = None
if percent > 0:
data_len = int(downloaded_data_len * 100 / percent)
data_len_str = u'~' + format_bytes(data_len)
self.report_progress(percent, data_len_str, speed, eta)
cursor_in_new_line = False
self._hook_progress({
'downloaded_bytes': downloaded_data_len,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'eta': eta,
'speed': speed,
})
elif self.params.get('verbose', False):
if not cursor_in_new_line:
self.to_screen(u'')
cursor_in_new_line = True
self.to_screen(u'[rtmpdump] '+line)
proc.wait()
if not cursor_in_new_line:
self.to_screen(u'')
return proc.returncode
self.report_destination(filename) self.report_destination(filename)
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
test = self.params.get('test', False) test = self.params.get('test', False)
@@ -291,12 +321,11 @@ class FileDownloader(object):
except (OSError, IOError): except (OSError, IOError):
self.report_error(u'RTMP download detected but "rtmpdump" could not be run') self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
return False return False
verbosity_option = '--verbose' if self.params.get('verbose', False) else '--quiet'
# Download using rtmpdump. rtmpdump returns exit code 2 when # Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be # the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK. # possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = ['rtmpdump', verbosity_option, '-r', url, '-o', tmpfilename] basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
if player_url is not None: if player_url is not None:
basic_args += ['--swfVfy', player_url] basic_args += ['--swfVfy', player_url]
if page_url is not None: if page_url is not None:
@@ -310,30 +339,48 @@ class FileDownloader(object):
if live: if live:
basic_args += ['--live'] basic_args += ['--live']
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)] args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
if sys.platform == 'win32' and sys.version_info < (3, 0):
# Windows subprocess module does not actually support Unicode
# on Python 2.x
# See http://stackoverflow.com/a/9951851/35070
subprocess_encoding = sys.getfilesystemencoding()
args = [a.encode(subprocess_encoding, 'ignore') for a in args]
else:
subprocess_encoding = None
if self.params.get('verbose', False): if self.params.get('verbose', False):
if subprocess_encoding:
str_args = [
a.decode(subprocess_encoding) if isinstance(a, bytes) else a
for a in args]
else:
str_args = args
try: try:
import pipes import pipes
shell_quote = lambda args: ' '.join(map(pipes.quote, args)) shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
except ImportError: except ImportError:
shell_quote = repr shell_quote = repr
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args)) self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
retval = subprocess.call(args)
retval = run_rtmpdump(args)
while (retval == 2 or retval == 1) and not test: while (retval == 2 or retval == 1) and not test:
prevsize = os.path.getsize(encodeFilename(tmpfilename)) prevsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True) self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
time.sleep(5.0) # This seems to be needed time.sleep(5.0) # This seems to be needed
retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1]) retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
cursize = os.path.getsize(encodeFilename(tmpfilename)) cursize = os.path.getsize(encodeFilename(tmpfilename))
if prevsize == cursize and retval == 1: if prevsize == cursize and retval == 1:
break break
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
if prevsize == cursize and retval == 2 and cursize > 1024: if prevsize == cursize and retval == 2 and cursize > 1024:
self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.') self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
retval = 0 retval = 0
break break
if retval == 0 or (test and retval == 2): if retval == 0 or (test and retval == 2):
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[rtmpdump] %s bytes' % fsize) self.to_screen(u'[rtmpdump] %s bytes' % fsize)
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
self._hook_progress({ self._hook_progress({
'downloaded_bytes': fsize, 'downloaded_bytes': fsize,
@@ -536,7 +583,7 @@ class FileDownloader(object):
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False return False
data_len_str = self.format_bytes(data_len) data_len_str = format_bytes(data_len)
byte_counter = 0 + resume_len byte_counter = 0 + resume_len
block_size = self.params.get('buffersize', 1024) block_size = self.params.get('buffersize', 1024)
start = time.time() start = time.time()

View File

@@ -5,17 +5,53 @@ from __future__ import absolute_import
import errno import errno
import io import io
import json
import os import os
import platform
import re import re
import shutil import shutil
import subprocess
import socket import socket
import sys import sys
import time import time
import traceback import traceback
from .utils import * if os.name == 'nt':
import ctypes
from .utils import (
compat_cookiejar,
compat_http_client,
compat_print,
compat_str,
compat_urllib_error,
compat_urllib_request,
ContentTooShortError,
date_from_str,
DateRange,
determine_ext,
DownloadError,
encodeFilename,
ExtractorError,
format_bytes,
locked_file,
make_HTTPS_handler,
MaxDownloadsReached,
PostProcessingError,
platform_name,
preferredencoding,
SameFileError,
sanitize_filename,
subtitles_filename,
takewhile_inclusive,
UnavailableVideoError,
write_json_file,
write_string,
YoutubeDLHandler,
)
from .extractor import get_info_extractor, gen_extractors from .extractor import get_info_extractor, gen_extractors
from .FileDownloader import FileDownloader from .FileDownloader import FileDownloader
from .version import __version__
class YoutubeDL(object): class YoutubeDL(object):
@@ -57,6 +93,7 @@ class YoutubeDL(object):
forcethumbnail: Force printing thumbnail URL. forcethumbnail: Force printing thumbnail URL.
forcedescription: Force printing description. forcedescription: Force printing description.
forcefilename: Force printing final filename. forcefilename: Force printing final filename.
forcejson: Force printing info_dict as JSON.
simulate: Do not download the video files. simulate: Do not download the video files.
format: Video format code. format: Video format code.
format_limit: Highest quality format to try. format_limit: Highest quality format to try.
@@ -68,6 +105,7 @@ class YoutubeDL(object):
playlistend: Playlist item to end at. playlistend: Playlist item to end at.
matchtitle: Download only matching titles. matchtitle: Download only matching titles.
rejecttitle: Reject downloads for matching titles. rejecttitle: Reject downloads for matching titles.
logger: Log messages to a logging.Logger instance.
logtostderr: Log messages to stderr instead of stdout. logtostderr: Log messages to stderr instead of stdout.
writedescription: Write the video description to a .description file writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file writeinfojson: Write the video description to a .info.json file
@@ -88,9 +126,13 @@ class YoutubeDL(object):
noplaylist: Download single video instead of a playlist if in doubt. noplaylist: Download single video instead of a playlist if in doubt.
age_limit: An integer representing the user's age in years. age_limit: An integer representing the user's age in years.
Unsuitable videos for the given age are skipped. Unsuitable videos for the given age are skipped.
downloadarchive: File name of a file where all downloads are recorded. download_archive: File name of a file where all downloads are recorded.
Videos already present in the file are not downloaded Videos already present in the file are not downloaded
again. again.
cookiefile: File name where cookies should be read from and dumped to.
nocheckcertificate:Do not verify SSL certificates
proxy: URL of the proxy server to use
socket_timeout: Time to wait for unresponsive hosts, in seconds
The following parameters are not used by YoutubeDL itself, they are used by The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader: the FileDownloader:
@@ -105,7 +147,7 @@ class YoutubeDL(object):
_num_downloads = None _num_downloads = None
_screen_file = None _screen_file = None
def __init__(self, params): def __init__(self, params=None):
"""Create a FileDownloader object with the given options.""" """Create a FileDownloader object with the given options."""
self._ies = [] self._ies = []
self._ies_instances = {} self._ies_instances = {}
@@ -114,6 +156,7 @@ class YoutubeDL(object):
self._download_retcode = 0 self._download_retcode = 0
self._num_downloads = 0 self._num_downloads = 0
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
self.params = {} if params is None else params
if (sys.version_info >= (3,) and sys.platform != 'win32' and if (sys.version_info >= (3,) and sys.platform != 'win32' and
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
@@ -123,14 +166,15 @@ class YoutubeDL(object):
u'Assuming --restrict-filenames since file system encoding ' u'Assuming --restrict-filenames since file system encoding '
u'cannot encode all charactes. ' u'cannot encode all charactes. '
u'Set the LC_ALL environment variable to fix this.') u'Set the LC_ALL environment variable to fix this.')
params['restrictfilenames'] = True self.params['restrictfilenames'] = True
self.params = params
self.fd = FileDownloader(self, self.params) self.fd = FileDownloader(self, self.params)
if '%(stitle)s' in self.params['outtmpl']: if '%(stitle)s' in self.params.get('outtmpl', ''):
self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
self._setup_opener()
def add_info_extractor(self, ie): def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list.""" """Add an InfoExtractor object to the end of the list."""
self._ies.append(ie) self._ies.append(ie)
@@ -163,7 +207,9 @@ class YoutubeDL(object):
def to_screen(self, message, skip_eol=False): def to_screen(self, message, skip_eol=False):
"""Print message to stdout if not in quiet mode.""" """Print message to stdout if not in quiet mode."""
if not self.params.get('quiet', False): if self.params.get('logger'):
self.params['logger'].debug(message)
elif not self.params.get('quiet', False):
terminator = [u'\n', u''][skip_eol] terminator = [u'\n', u''][skip_eol]
output = message + terminator output = message + terminator
write_string(output, self._screen_file) write_string(output, self._screen_file)
@@ -171,14 +217,47 @@ class YoutubeDL(object):
def to_stderr(self, message): def to_stderr(self, message):
"""Print message to stderr.""" """Print message to stderr."""
assert type(message) == type(u'') assert type(message) == type(u'')
output = message + u'\n' if self.params.get('logger'):
if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr self.params['logger'].error(message)
output = output.encode(preferredencoding()) else:
sys.stderr.write(output) output = message + u'\n'
if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
output = output.encode(preferredencoding())
sys.stderr.write(output)
def fixed_template(self): def to_console_title(self, message):
"""Checks if the output template is fixed.""" if not self.params.get('consoletitle', False):
return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None) return
if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
# c_wchar_p() might not be necessary if `message` is
# already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
elif 'TERM' in os.environ:
write_string(u'\033]0;%s\007' % message, self._screen_file)
def save_console_title(self):
if not self.params.get('consoletitle', False):
return
if 'TERM' in os.environ:
# Save the title on stack
write_string(u'\033[22;0t', self._screen_file)
def restore_console_title(self):
if not self.params.get('consoletitle', False):
return
if 'TERM' in os.environ:
# Restore the title from stack
write_string(u'\033[23;0t', self._screen_file)
def __enter__(self):
self.save_console_title()
return self
def __exit__(self, *args):
self.restore_console_title()
if self.params.get('cookiefile') is not None:
self.cookiejar.save()
def trouble(self, message=None, tb=None): def trouble(self, message=None, tb=None):
"""Determine action to take when a download problem appears. """Determine action to take when a download problem appears.
@@ -254,7 +333,7 @@ class YoutubeDL(object):
"""Report file has already been fully downloaded.""" """Report file has already been fully downloaded."""
try: try:
self.to_screen(u'[download] %s has already been downloaded' % file_name) self.to_screen(u'[download] %s has already been downloaded' % file_name)
except (UnicodeEncodeError) as err: except UnicodeEncodeError:
self.to_screen(u'[download] The file has already been downloaded') self.to_screen(u'[download] The file has already been downloaded')
def increment_downloads(self): def increment_downloads(self):
@@ -295,15 +374,17 @@ class YoutubeDL(object):
def _match_entry(self, info_dict): def _match_entry(self, info_dict):
""" Returns None iff the file should be downloaded """ """ Returns None iff the file should be downloaded """
title = info_dict['title'] if 'title' in info_dict:
matchtitle = self.params.get('matchtitle', False) # This can happen when we're just evaluating the playlist
if matchtitle: title = info_dict['title']
if not re.search(matchtitle, title, re.IGNORECASE): matchtitle = self.params.get('matchtitle', False)
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' if matchtitle:
rejecttitle = self.params.get('rejecttitle', False) if not re.search(matchtitle, title, re.IGNORECASE):
if rejecttitle: return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
if re.search(rejecttitle, title, re.IGNORECASE): rejecttitle = self.params.get('rejecttitle', False)
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' if rejecttitle:
if re.search(rejecttitle, title, re.IGNORECASE):
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
date = info_dict.get('upload_date', None) date = info_dict.get('upload_date', None)
if date is not None: if date is not None:
dateRange = self.params.get('daterange', DateRange()) dateRange = self.params.get('daterange', DateRange())
@@ -314,8 +395,8 @@ class YoutubeDL(object):
if age_limit < info_dict.get('age_limit', 0): if age_limit < info_dict.get('age_limit', 0):
return u'Skipping "' + title + '" because it is age restricted' return u'Skipping "' + title + '" because it is age restricted'
if self.in_download_archive(info_dict): if self.in_download_archive(info_dict):
return (u'%(title)s has already been recorded in archive' return (u'%s has already been recorded in archive'
% info_dict) % info_dict.get('title', info_dict.get('id', u'video')))
return None return None
@staticmethod @staticmethod
@@ -385,7 +466,7 @@ class YoutubeDL(object):
result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
if result_type == 'video': if result_type == 'video':
self.add_extra_info(ie_result, extra_info) self.add_extra_info(ie_result, extra_info)
return self.process_video_result(ie_result) return self.process_video_result(ie_result, download=download)
elif result_type == 'url': elif result_type == 'url':
# We have to add extra_info to the results because it may be # We have to add extra_info to the results because it may be
# contained in a playlist # contained in a playlist
@@ -394,7 +475,7 @@ class YoutubeDL(object):
ie_key=ie_result.get('ie_key'), ie_key=ie_result.get('ie_key'),
extra_info=extra_info) extra_info=extra_info)
elif result_type == 'playlist': elif result_type == 'playlist':
self.add_extra_info(ie_result, extra_info)
# We process each entry in the playlist # We process each entry in the playlist
playlist = ie_result.get('title', None) or ie_result.get('id', None) playlist = ie_result.get('title', None) or ie_result.get('id', None)
self.to_screen(u'[download] Downloading playlist: %s' % playlist) self.to_screen(u'[download] Downloading playlist: %s' % playlist)
@@ -424,6 +505,12 @@ class YoutubeDL(object):
'webpage_url': ie_result['webpage_url'], 'webpage_url': ie_result['webpage_url'],
'extractor_key': ie_result['extractor_key'], 'extractor_key': ie_result['extractor_key'],
} }
reason = self._match_entry(entry)
if reason is not None:
self.to_screen(u'[download] ' + reason)
continue
entry_result = self.process_ie_result(entry, entry_result = self.process_ie_result(entry,
download=download, download=download,
extra_info=extra) extra_info=extra)
@@ -579,7 +666,7 @@ class YoutubeDL(object):
# Forced printings # Forced printings
if self.params.get('forcetitle', False): if self.params.get('forcetitle', False):
compat_print(info_dict['title']) compat_print(info_dict['fulltitle'])
if self.params.get('forceid', False): if self.params.get('forceid', False):
compat_print(info_dict['id']) compat_print(info_dict['id'])
if self.params.get('forceurl', False): if self.params.get('forceurl', False):
@@ -593,6 +680,8 @@ class YoutubeDL(object):
compat_print(filename) compat_print(filename)
if self.params.get('forceformat', False): if self.params.get('forceformat', False):
compat_print(info_dict['format']) compat_print(info_dict['format'])
if self.params.get('forcejson', False):
compat_print(json.dumps(info_dict))
# Do nothing else if in simulate mode # Do nothing else if in simulate mode
if self.params.get('simulate', False): if self.params.get('simulate', False):
@@ -655,7 +744,7 @@ class YoutubeDL(object):
return return
if self.params.get('writeinfojson', False): if self.params.get('writeinfojson', False):
infofn = filename + u'.info.json' infofn = os.path.splitext(filename)[0] + u'.info.json'
self.report_writeinfojson(infofn) self.report_writeinfojson(infofn)
try: try:
json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle']) json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
@@ -706,13 +795,15 @@ class YoutubeDL(object):
def download(self, url_list): def download(self, url_list):
"""Download a given list of URLs.""" """Download a given list of URLs."""
if len(url_list) > 1 and self.fixed_template(): if (len(url_list) > 1 and
'%' not in self.params['outtmpl']
and self.params.get('max_downloads') != 1):
raise SameFileError(self.params['outtmpl']) raise SameFileError(self.params['outtmpl'])
for url in url_list: for url in url_list:
try: try:
#It also downloads the videos #It also downloads the videos
videos = self.extract_info(url) self.extract_info(url)
except UnavailableVideoError: except UnavailableVideoError:
self.report_error(u'unable to download video') self.report_error(u'unable to download video')
except MaxDownloadsReached: except MaxDownloadsReached:
@@ -744,11 +835,26 @@ class YoutubeDL(object):
except (IOError, OSError): except (IOError, OSError):
self.report_warning(u'Unable to remove downloaded video file') self.report_warning(u'Unable to remove downloaded video file')
def _make_archive_id(self, info_dict):
# Future-proof against any change in case
# and backwards compatibility with prior versions
extractor = info_dict.get('extractor_key')
if extractor is None:
if 'id' in info_dict:
extractor = info_dict.get('ie_key') # key in a playlist
if extractor is None:
return None # Incomplete video information
return extractor.lower() + u' ' + info_dict['id']
def in_download_archive(self, info_dict): def in_download_archive(self, info_dict):
fn = self.params.get('download_archive') fn = self.params.get('download_archive')
if fn is None: if fn is None:
return False return False
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
vid_id = self._make_archive_id(info_dict)
if vid_id is None:
return False # Incomplete video information
try: try:
with locked_file(fn, 'r', encoding='utf-8') as archive_file: with locked_file(fn, 'r', encoding='utf-8') as archive_file:
for line in archive_file: for line in archive_file:
@@ -763,12 +869,15 @@ class YoutubeDL(object):
fn = self.params.get('download_archive') fn = self.params.get('download_archive')
if fn is None: if fn is None:
return return
vid_id = info_dict['extractor'] + u' ' + info_dict['id'] vid_id = self._make_archive_id(info_dict)
assert vid_id
with locked_file(fn, 'a', encoding='utf-8') as archive_file: with locked_file(fn, 'a', encoding='utf-8') as archive_file:
archive_file.write(vid_id + u'\n') archive_file.write(vid_id + u'\n')
@staticmethod @staticmethod
def format_resolution(format, default='unknown'): def format_resolution(format, default='unknown'):
if format.get('vcodec') == 'none':
return 'audio only'
if format.get('_resolution') is not None: if format.get('_resolution') is not None:
return format['_resolution'] return format['_resolution']
if format.get('height') is not None: if format.get('height') is not None:
@@ -781,23 +890,124 @@ class YoutubeDL(object):
return res return res
def list_formats(self, info_dict): def list_formats(self, info_dict):
def line(format): def format_note(fdict):
return (u'%-20s%-10s%-12s%s' % ( res = u''
if fdict.get('format_note') is not None:
res += fdict['format_note'] + u' '
if (fdict.get('vcodec') is not None and
fdict.get('vcodec') != 'none'):
res += u'%-5s' % fdict['vcodec']
elif fdict.get('vbr') is not None:
res += u'video'
if fdict.get('vbr') is not None:
res += u'@%4dk' % fdict['vbr']
if fdict.get('acodec') is not None:
if res:
res += u', '
res += u'%-5s' % fdict['acodec']
elif fdict.get('abr') is not None:
if res:
res += u', '
res += 'audio'
if fdict.get('abr') is not None:
res += u'@%3dk' % fdict['abr']
if fdict.get('filesize') is not None:
if res:
res += u', '
res += format_bytes(fdict['filesize'])
return res
def line(format, idlen=20):
return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
format['format_id'], format['format_id'],
format['ext'], format['ext'],
self.format_resolution(format), self.format_resolution(format),
format.get('format_note', ''), format_note(format),
) ))
)
formats = info_dict.get('formats', [info_dict]) formats = info_dict.get('formats', [info_dict])
formats_s = list(map(line, formats)) idlen = max(len(u'format code'),
max(len(f['format_id']) for f in formats))
formats_s = [line(f, idlen) for f in formats]
if len(formats) > 1: if len(formats) > 1:
formats_s[0] += (' ' if formats[0].get('format_note') else '') + '(worst)' formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
formats_s[-1] += (' ' if formats[-1].get('format_note') else '') + '(best)' formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
header_line = line({ header_line = line({
'format_id': u'format code', 'ext': u'extension', 'format_id': u'format code', 'ext': u'extension',
'_resolution': u'resolution', 'format_note': u'note'}) '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
self.to_screen(u'[info] Available formats for %s:\n%s\n%s' % self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
(info_dict['id'], header_line, u"\n".join(formats_s))) (info_dict['id'], header_line, u"\n".join(formats_s)))
def urlopen(self, req):
""" Start an HTTP download """
return self._opener.open(req)
def print_debug_header(self):
if not self.params.get('verbose'):
return
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
try:
sp = subprocess.Popen(
['git', 'rev-parse', '--short', 'HEAD'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
cwd=os.path.dirname(os.path.abspath(__file__)))
out, err = sp.communicate()
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
write_string(u'[debug] Git HEAD: ' + out + u'\n')
except:
try:
sys.exc_clear()
except:
pass
write_string(u'[debug] Python version %s - %s' %
(platform.python_version(), platform_name()) + u'\n')
proxy_map = {}
for handler in self._opener.handlers:
if hasattr(handler, 'proxies'):
proxy_map.update(handler.proxies)
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
def _setup_opener(self):
timeout_val = self.params.get('socket_timeout')
timeout = 600 if timeout_val is None else float(timeout_val)
opts_cookiefile = self.params.get('cookiefile')
opts_proxy = self.params.get('proxy')
if opts_cookiefile is None:
self.cookiejar = compat_cookiejar.CookieJar()
else:
self.cookiejar = compat_cookiejar.MozillaCookieJar(
opts_cookiefile)
if os.access(opts_cookiefile, os.R_OK):
self.cookiejar.load()
cookie_processor = compat_urllib_request.HTTPCookieProcessor(
self.cookiejar)
if opts_proxy is not None:
if opts_proxy == '':
proxies = {}
else:
proxies = {'http': opts_proxy, 'https': opts_proxy}
else:
proxies = compat_urllib_request.getproxies()
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
if 'http' in proxies and 'https' not in proxies:
proxies['https'] = proxies['http']
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
https_handler = make_HTTPS_handler(
self.params.get('nocheckcertificate', False))
opener = compat_urllib_request.build_opener(
https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
opener.addheaders = []
self._opener = opener
# TODO remove this global modification
compat_urllib_request.install_opener(opener)
socket.setdefaulttimeout(timeout)

View File

@@ -34,50 +34,42 @@ __authors__ = (
'Andras Elso', 'Andras Elso',
'Jelle van der Waa', 'Jelle van der Waa',
'Marcin Cieślak', 'Marcin Cieślak',
'Anton Larionov',
'Takuya Tsuchida',
) )
__license__ = 'Public Domain' __license__ = 'Public Domain'
import codecs import codecs
import collections
import getpass import getpass
import optparse import optparse
import os import os
import random import random
import re import re
import shlex import shlex
import socket
import subprocess import subprocess
import sys import sys
import traceback
import platform
from .utils import ( from .utils import (
compat_cookiejar,
compat_print, compat_print,
compat_str,
compat_urllib_request,
DateRange, DateRange,
decodeOption, decodeOption,
determine_ext, determine_ext,
DownloadError, DownloadError,
get_cachedir, get_cachedir,
make_HTTPS_handler,
MaxDownloadsReached, MaxDownloadsReached,
platform_name,
preferredencoding, preferredencoding,
SameFileError, SameFileError,
std_headers, std_headers,
write_string, write_string,
YoutubeDLHandler,
) )
from .update import update_self from .update import update_self
from .version import __version__
from .FileDownloader import ( from .FileDownloader import (
FileDownloader, FileDownloader,
) )
from .extractor import gen_extractors from .extractor import gen_extractors
from .version import __version__
from .YoutubeDL import YoutubeDL from .YoutubeDL import YoutubeDL
from .PostProcessor import ( from .PostProcessor import (
FFmpegMetadataPP, FFmpegMetadataPP,
@@ -206,6 +198,9 @@ def parseOpts(overrideArguments=None):
general.add_option( general.add_option(
'--no-cache-dir', action='store_const', const=None, dest='cachedir', '--no-cache-dir', action='store_const', const=None, dest='cachedir',
help='Disable filesystem caching') help='Disable filesystem caching')
general.add_option(
'--socket-timeout', dest='socket_timeout',
type=float, default=None, help=optparse.SUPPRESS_HELP)
selection.add_option('--playlist-start', selection.add_option('--playlist-start',
@@ -214,7 +209,9 @@ def parseOpts(overrideArguments=None):
dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)') selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)') selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None) selection.add_option('--max-downloads', metavar='NUMBER',
dest='max_downloads', type=int, default=None,
help='Abort after downloading NUMBER files')
selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None) selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None) selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None) selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
@@ -226,7 +223,7 @@ def parseOpts(overrideArguments=None):
default=None, type=int) default=None, type=int)
selection.add_option('--download-archive', metavar='FILE', selection.add_option('--download-archive', metavar='FILE',
dest='download_archive', dest='download_archive',
help='Download only videos not present in the archive file. Record all downloaded videos in it.') help='Download only videos not present in the archive file. Record the IDs of all downloaded videos in it.')
authentication.add_option('-u', '--username', authentication.add_option('-u', '--username',
@@ -241,7 +238,7 @@ def parseOpts(overrideArguments=None):
video_format.add_option('-f', '--format', video_format.add_option('-f', '--format',
action='store', dest='format', metavar='FORMAT', default='best', action='store', dest='format', metavar='FORMAT', default='best',
help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported') help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
video_format.add_option('--all-formats', video_format.add_option('--all-formats',
action='store_const', dest='format', help='download all available video formats', const='all') action='store_const', dest='format', help='download all available video formats', const='all')
video_format.add_option('--prefer-free-formats', video_format.add_option('--prefer-free-formats',
@@ -306,6 +303,9 @@ def parseOpts(overrideArguments=None):
verbosity.add_option('--get-format', verbosity.add_option('--get-format',
action='store_true', dest='getformat', action='store_true', dest='getformat',
help='simulate, quiet but print output format', default=False) help='simulate, quiet but print output format', default=False)
verbosity.add_option('-j', '--dump-json',
action='store_true', dest='dumpjson',
help='simulate, quiet but print JSON information', default=False)
verbosity.add_option('--newline', verbosity.add_option('--newline',
action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False) action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
verbosity.add_option('--no-progress', verbosity.add_option('--no-progress',
@@ -320,7 +320,7 @@ def parseOpts(overrideArguments=None):
help='print downloaded pages to debug problems(very verbose)') help='print downloaded pages to debug problems(very verbose)')
verbosity.add_option('--write-pages', verbosity.add_option('--write-pages',
action='store_true', dest='write_pages', default=False, action='store_true', dest='write_pages', default=False,
help='Write downloaded pages to files in the current directory') help='Write downloaded intermediary pages to files in the current directory to debug problems')
verbosity.add_option('--youtube-print-sig-code', verbosity.add_option('--youtube-print-sig-code',
action='store_true', dest='youtube_print_sig_code', default=False, action='store_true', dest='youtube_print_sig_code', default=False,
help=optparse.SUPPRESS_HELP) help=optparse.SUPPRESS_HELP)
@@ -447,19 +447,6 @@ def _real_main(argv=None):
parser, opts, args = parseOpts(argv) parser, opts, args = parseOpts(argv)
# Open appropriate CookieJar
if opts.cookiefile is None:
jar = compat_cookiejar.CookieJar()
else:
try:
jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile)
if os.access(opts.cookiefile, os.R_OK):
jar.load()
except (IOError, OSError) as err:
if opts.verbose:
traceback.print_exc()
write_string(u'ERROR: unable to open cookie file\n')
sys.exit(101)
# Set user agent # Set user agent
if opts.user_agent is not None: if opts.user_agent is not None:
std_headers['User-Agent'] = opts.user_agent std_headers['User-Agent'] = opts.user_agent
@@ -491,8 +478,6 @@ def _real_main(argv=None):
all_urls = batchurls + args all_urls = batchurls + args
all_urls = [url.strip() for url in all_urls] all_urls = [url.strip() for url in all_urls]
opener = _setup_opener(jar=jar, opts=opts)
extractors = gen_extractors() extractors = gen_extractors()
if opts.list_extractors: if opts.list_extractors:
@@ -547,7 +532,7 @@ def _real_main(argv=None):
if opts.retries is not None: if opts.retries is not None:
try: try:
opts.retries = int(opts.retries) opts.retries = int(opts.retries)
except (TypeError, ValueError) as err: except (TypeError, ValueError):
parser.error(u'invalid retry count specified') parser.error(u'invalid retry count specified')
if opts.buffersize is not None: if opts.buffersize is not None:
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
@@ -558,13 +543,13 @@ def _real_main(argv=None):
opts.playliststart = int(opts.playliststart) opts.playliststart = int(opts.playliststart)
if opts.playliststart <= 0: if opts.playliststart <= 0:
raise ValueError(u'Playlist start must be positive') raise ValueError(u'Playlist start must be positive')
except (TypeError, ValueError) as err: except (TypeError, ValueError):
parser.error(u'invalid playlist start number specified') parser.error(u'invalid playlist start number specified')
try: try:
opts.playlistend = int(opts.playlistend) opts.playlistend = int(opts.playlistend)
if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart): if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
raise ValueError(u'Playlist end must be greater than playlist start') raise ValueError(u'Playlist end must be greater than playlist start')
except (TypeError, ValueError) as err: except (TypeError, ValueError):
parser.error(u'invalid playlist end number specified') parser.error(u'invalid playlist end number specified')
if opts.extractaudio: if opts.extractaudio:
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']: if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
@@ -603,13 +588,12 @@ def _real_main(argv=None):
u' file! Use "%%(ext)s" instead of %r' % u' file! Use "%%(ext)s" instead of %r' %
determine_ext(outtmpl, u'')) determine_ext(outtmpl, u''))
# YoutubeDL ydl_opts = {
ydl = YoutubeDL({
'usenetrc': opts.usenetrc, 'usenetrc': opts.usenetrc,
'username': opts.username, 'username': opts.username,
'password': opts.password, 'password': opts.password,
'videopassword': opts.videopassword, 'videopassword': opts.videopassword,
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat), 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
'forceurl': opts.geturl, 'forceurl': opts.geturl,
'forcetitle': opts.gettitle, 'forcetitle': opts.gettitle,
'forceid': opts.getid, 'forceid': opts.getid,
@@ -617,8 +601,9 @@ def _real_main(argv=None):
'forcedescription': opts.getdescription, 'forcedescription': opts.getdescription,
'forcefilename': opts.getfilename, 'forcefilename': opts.getfilename,
'forceformat': opts.getformat, 'forceformat': opts.getformat,
'forcejson': opts.dumpjson,
'simulate': opts.simulate, 'simulate': opts.simulate,
'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat), 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
'format': opts.format, 'format': opts.format,
'format_limit': opts.format_limit, 'format_limit': opts.format_limit,
'listformats': opts.listformats, 'listformats': opts.listformats,
@@ -667,102 +652,47 @@ def _real_main(argv=None):
'youtube_print_sig_code': opts.youtube_print_sig_code, 'youtube_print_sig_code': opts.youtube_print_sig_code,
'age_limit': opts.age_limit, 'age_limit': opts.age_limit,
'download_archive': opts.download_archive, 'download_archive': opts.download_archive,
}) 'cookiefile': opts.cookiefile,
'nocheckcertificate': opts.no_check_certificate,
'proxy': opts.proxy,
'socket_timeout': opts.socket_timeout,
}
with YoutubeDL(ydl_opts) as ydl:
ydl.print_debug_header()
ydl.add_default_info_extractors()
# PostProcessors
# Add the metadata pp first, the other pps will copy it
if opts.addmetadata:
ydl.add_post_processor(FFmpegMetadataPP())
if opts.extractaudio:
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
if opts.recodevideo:
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
if opts.embedsubtitles:
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
# Update version
if opts.update_self:
update_self(ydl.to_screen, opts.verbose)
# Maybe do nothing
if len(all_urls) < 1:
if not opts.update_self:
parser.error(u'you must provide at least one URL')
else:
sys.exit()
if opts.verbose:
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
try: try:
sp = subprocess.Popen( retcode = ydl.download(all_urls)
['git', 'rev-parse', '--short', 'HEAD'], except MaxDownloadsReached:
stdout=subprocess.PIPE, stderr=subprocess.PIPE, ydl.to_screen(u'--max-download limit reached, aborting.')
cwd=os.path.dirname(os.path.abspath(__file__))) retcode = 101
out, err = sp.communicate()
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
write_string(u'[debug] Git HEAD: ' + out + u'\n')
except:
try:
sys.exc_clear()
except:
pass
write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
proxy_map = {}
for handler in opener.handlers:
if hasattr(handler, 'proxies'):
proxy_map.update(handler.proxies)
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
ydl.add_default_info_extractors()
# PostProcessors
# Add the metadata pp first, the other pps will copy it
if opts.addmetadata:
ydl.add_post_processor(FFmpegMetadataPP())
if opts.extractaudio:
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
if opts.recodevideo:
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
if opts.embedsubtitles:
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
# Update version
if opts.update_self:
update_self(ydl.to_screen, opts.verbose)
# Maybe do nothing
if len(all_urls) < 1:
if not opts.update_self:
parser.error(u'you must provide at least one URL')
else:
sys.exit()
try:
retcode = ydl.download(all_urls)
except MaxDownloadsReached:
ydl.to_screen(u'--max-download limit reached, aborting.')
retcode = 101
# Dump cookie jar if requested
if opts.cookiefile is not None:
try:
jar.save()
except (IOError, OSError):
sys.exit(u'ERROR: unable to save cookie jar')
sys.exit(retcode) sys.exit(retcode)
def _setup_opener(jar=None, opts=None, timeout=300):
if opts is None:
FakeOptions = collections.namedtuple(
'FakeOptions', ['proxy', 'no_check_certificate'])
opts = FakeOptions(proxy=None, no_check_certificate=False)
cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
if opts.proxy is not None:
if opts.proxy == '':
proxies = {}
else:
proxies = {'http': opts.proxy, 'https': opts.proxy}
else:
proxies = compat_urllib_request.getproxies()
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
if 'http' in proxies and 'https' not in proxies:
proxies['https'] = proxies['http']
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
https_handler = make_HTTPS_handler(opts)
opener = compat_urllib_request.build_opener(
https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
opener.addheaders = []
compat_urllib_request.install_opener(opener)
socket.setdefaulttimeout(timeout)
return opener
def main(argv=None): def main(argv=None):
try: try:
_real_main(argv) _real_main(argv)

View File

@@ -1,5 +1,6 @@
from .appletrailers import AppleTrailersIE from .appletrailers import AppleTrailersIE
from .addanime import AddAnimeIE from .addanime import AddAnimeIE
from .anitube import AnitubeIE
from .archiveorg import ArchiveOrgIE from .archiveorg import ArchiveOrgIE
from .ard import ARDIE from .ard import ARDIE
from .arte import ( from .arte import (
@@ -10,7 +11,7 @@ from .arte import (
) )
from .auengine import AUEngineIE from .auengine import AUEngineIE
from .bambuser import BambuserIE, BambuserChannelIE from .bambuser import BambuserIE, BambuserChannelIE
from .bandcamp import BandcampIE from .bandcamp import BandcampIE, BandcampAlbumIE
from .bliptv import BlipTVIE, BlipTVUserIE from .bliptv import BlipTVIE, BlipTVUserIE
from .bloomberg import BloombergIE from .bloomberg import BloombergIE
from .breakcom import BreakIE from .breakcom import BreakIE
@@ -19,12 +20,15 @@ from .c56 import C56IE
from .canalplus import CanalplusIE from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE from .canalc2 import Canalc2IE
from .cinemassacre import CinemassacreIE from .cinemassacre import CinemassacreIE
from .clipfish import ClipfishIE
from .clipsyndicate import ClipsyndicateIE
from .cnn import CNNIE from .cnn import CNNIE
from .collegehumor import CollegeHumorIE from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .condenast import CondeNastIE from .condenast import CondeNastIE
from .criterion import CriterionIE from .criterion import CriterionIE
from .cspan import CSpanIE from .cspan import CSpanIE
from .d8 import D8IE
from .dailymotion import ( from .dailymotion import (
DailymotionIE, DailymotionIE,
DailymotionPlaylistIE, DailymotionPlaylistIE,
@@ -68,6 +72,7 @@ from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE from .howcast import HowcastIE
from .hypem import HypemIE from .hypem import HypemIE
from .ign import IGNIE, OneUPIE from .ign import IGNIE, OneUPIE
from .imdb import ImdbIE
from .ina import InaIE from .ina import InaIE
from .infoq import InfoQIE from .infoq import InfoQIE
from .instagram import InstagramIE from .instagram import InstagramIE
@@ -96,11 +101,13 @@ from .nba import NBAIE
from .nbc import NBCNewsIE from .nbc import NBCNewsIE
from .newgrounds import NewgroundsIE from .newgrounds import NewgroundsIE
from .nhl import NHLIE, NHLVideocenterIE from .nhl import NHLIE, NHLVideocenterIE
from .niconico import NiconicoIE
from .nowvideo import NowVideoIE from .nowvideo import NowVideoIE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from .orf import ORFIE from .orf import ORFIE
from .pbs import PBSIE from .pbs import PBSIE
from .photobucket import PhotobucketIE from .photobucket import PhotobucketIE
from .podomatic import PodomaticIE
from .pornhub import PornHubIE from .pornhub import PornHubIE
from .pornotube import PornotubeIE from .pornotube import PornotubeIE
from .rbmaradio import RBMARadioIE from .rbmaradio import RBMARadioIE
@@ -116,19 +123,24 @@ from .slashdot import SlashdotIE
from .slideshare import SlideshareIE from .slideshare import SlideshareIE
from .sohu import SohuIE from .sohu import SohuIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
from .southparkstudios import SouthParkStudiosIE from .southparkstudios import (
SouthParkStudiosIE,
SouthparkDeIE,
)
from .space import SpaceIE from .space import SpaceIE
from .spankwire import SpankwireIE from .spankwire import SpankwireIE
from .spiegel import SpiegelIE from .spiegel import SpiegelIE
from .stanfordoc import StanfordOpenClassroomIE from .stanfordoc import StanfordOpenClassroomIE
from .statigram import StatigramIE from .statigram import StatigramIE
from .steam import SteamIE from .steam import SteamIE
from .streamcloud import StreamcloudIE
from .sztvhu import SztvHuIE from .sztvhu import SztvHuIE
from .teamcoco import TeamcocoIE from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE from .techtalks import TechTalksIE
from .ted import TEDIE from .ted import TEDIE
from .tf1 import TF1IE from .tf1 import TF1IE
from .thisav import ThisAVIE from .thisav import ThisAVIE
from .toutv import TouTvIE
from .traileraddict import TrailerAddictIE from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE from .trilulilu import TriluliluIE
from .tube8 import Tube8IE from .tube8 import Tube8IE
@@ -147,8 +159,13 @@ from .viddler import ViddlerIE
from .videodetective import VideoDetectiveIE from .videodetective import VideoDetectiveIE
from .videofyme import VideofyMeIE from .videofyme import VideofyMeIE
from .videopremium import VideoPremiumIE from .videopremium import VideoPremiumIE
from .vimeo import VimeoIE, VimeoChannelIE from .vimeo import (
VimeoIE,
VimeoChannelIE,
VimeoUserIE,
)
from .vine import VineIE from .vine import VineIE
from .viki import VikiIE
from .vk import VKIE from .vk import VKIE
from .wat import WatIE from .wat import WatIE
from .websurg import WeBSurgIE from .websurg import WeBSurgIE
@@ -159,7 +176,11 @@ from .xhamster import XHamsterIE
from .xnxx import XNXXIE from .xnxx import XNXXIE
from .xvideos import XVideosIE from .xvideos import XVideosIE
from .xtube import XTubeIE from .xtube import XTubeIE
from .yahoo import YahooIE, YahooSearchIE from .yahoo import (
YahooIE,
YahooNewsIE,
YahooSearchIE,
)
from .youjizz import YouJizzIE from .youjizz import YouJizzIE
from .youku import YoukuIE from .youku import YoukuIE
from .youporn import YouPornIE from .youporn import YouPornIE
@@ -176,6 +197,7 @@ from .youtube import (
YoutubeTruncatedURLIE, YoutubeTruncatedURLIE,
YoutubeWatchLaterIE, YoutubeWatchLaterIE,
YoutubeFavouritesIE, YoutubeFavouritesIE,
YoutubeHistoryIE,
) )
from .zdf import ZDFIE from .zdf import ZDFIE

View File

@@ -0,0 +1,53 @@
import re
from .common import InfoExtractor
class AnitubeIE(InfoExtractor):
IE_NAME = u'anitube.se'
_VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.anitube.se/video/36621',
u'md5': u'59d0eeae28ea0bc8c05e7af429998d43',
u'file': u'36621.mp4',
u'info_dict': {
u'id': u'36621',
u'ext': u'mp4',
u'title': u'Recorder to Randoseru 01',
},
u'skip': u'Blocked in the US',
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
webpage, u'key')
config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
key)
video_title = config_xml.find('title').text
formats = []
video_url = config_xml.find('file')
if video_url is not None:
formats.append({
'format_id': 'sd',
'url': video_url.text,
})
video_url = config_xml.find('filehd')
if video_url is not None:
formats.append({
'format_id': 'hd',
'url': video_url.text,
})
return {
'id': video_id,
'title': video_title,
'formats': formats
}

View File

@@ -1,7 +1,6 @@
# encoding: utf-8 # encoding: utf-8
import re import re
import json import json
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@@ -69,7 +68,7 @@ class ArteTvIE(InfoExtractor):
lang = mobj.group('lang') lang = mobj.group('lang')
return self._extract_liveweb(url, name, lang) return self._extract_liveweb(url, name, lang)
if re.search(self._LIVE_URL, video_id) is not None: if re.search(self._LIVE_URL, url) is not None:
raise ExtractorError(u'Arte live streams are not yet supported, sorry') raise ExtractorError(u'Arte live streams are not yet supported, sorry')
# self.extractLiveStream(url) # self.extractLiveStream(url)
# return # return
@@ -78,8 +77,7 @@ class ArteTvIE(InfoExtractor):
"""Extract from videos.arte.tv""" """Extract from videos.arte.tv"""
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/') ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml') ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
ref_xml = self._download_webpage(ref_xml_url, video_id, note=u'Downloading metadata') ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata')
ref_xml_doc = xml.etree.ElementTree.fromstring(ref_xml)
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang) config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
config_xml_url = config_node.attrib['ref'] config_xml_url = config_node.attrib['ref']
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration') config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
@@ -109,13 +107,12 @@ class ArteTvIE(InfoExtractor):
"""Extract form http://liveweb.arte.tv/""" """Extract form http://liveweb.arte.tv/"""
webpage = self._download_webpage(url, name) webpage = self._download_webpage(url, name)
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id') video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id, config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
video_id, u'Downloading information') video_id, u'Downloading information')
config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
event_doc = config_doc.find('event') event_doc = config_doc.find('event')
url_node = event_doc.find('video').find('urlHd') url_node = event_doc.find('video').find('urlHd')
if url_node is None: if url_node is None:
url_node = video_doc.find('urlSd') url_node = event_doc.find('urlSd')
return {'id': video_id, return {'id': video_id,
'title': event_doc.find('name%s' % lang.capitalize()).text, 'title': event_doc.find('name%s' % lang.capitalize()).text,

View File

@@ -1,10 +1,10 @@
import os.path
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urllib_parse, compat_urllib_parse,
compat_urllib_parse_urlparse, determine_ext,
ExtractorError,
) )
class AUEngineIE(InfoExtractor): class AUEngineIE(InfoExtractor):
@@ -25,22 +25,25 @@ class AUEngineIE(InfoExtractor):
title = self._html_search_regex(r'<title>(?P<title>.+?)</title>', title = self._html_search_regex(r'<title>(?P<title>.+?)</title>',
webpage, u'title') webpage, u'title')
title = title.strip() title = title.strip()
links = re.findall(r'[^A-Za-z0-9]?(?:file|url):\s*["\'](http[^\'"&]*)', webpage) links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage)
links = [compat_urllib_parse.unquote(l) for l in links] links = map(compat_urllib_parse.unquote, links)
thumbnail = None
video_url = None
for link in links: for link in links:
root, pathext = os.path.splitext(compat_urllib_parse_urlparse(link).path) if link.endswith('.png'):
if pathext == '.png':
thumbnail = link thumbnail = link
elif pathext == '.mp4': elif '/videos/' in link:
url = link video_url = link
ext = pathext if not video_url:
raise ExtractorError(u'Could not find video URL')
ext = u'.' + determine_ext(video_url)
if ext == title[-len(ext):]: if ext == title[-len(ext):]:
title = title[:-len(ext)] title = title[:-len(ext)]
ext = ext[1:]
return [{ return {
'id': video_id, 'id': video_id,
'url': url, 'url': video_url,
'ext': ext,
'title': title, 'title': title,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
}] }

View File

@@ -15,7 +15,8 @@ class BambuserIE(InfoExtractor):
_TEST = { _TEST = {
u'url': u'http://bambuser.com/v/4050584', u'url': u'http://bambuser.com/v/4050584',
u'md5': u'fba8f7693e48fd4e8641b3fd5539a641', # MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
#u'md5': u'fba8f7693e48fd4e8641b3fd5539a641',
u'info_dict': { u'info_dict': {
u'id': u'4050584', u'id': u'4050584',
u'ext': u'flv', u'ext': u'flv',
@@ -24,6 +25,11 @@ class BambuserIE(InfoExtractor):
u'uploader': u'pixelversity', u'uploader': u'pixelversity',
u'uploader_id': u'344706', u'uploader_id': u'344706',
}, },
u'params': {
# It doesn't respect the 'Range' header, it would download the whole video
# caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59
u'skip_download': True,
},
} }
def _real_extract(self, url): def _real_extract(self, url):

View File

@@ -3,13 +3,16 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_str,
compat_urlparse,
ExtractorError, ExtractorError,
) )
class BandcampIE(InfoExtractor): class BandcampIE(InfoExtractor):
IE_NAME = u'Bandcamp'
_VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)' _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
_TEST = { _TESTS = [{
u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
u'file': u'1812978515.mp3', u'file': u'1812978515.mp3',
u'md5': u'cdeb30cdae1921719a3cbcab696ef53c', u'md5': u'cdeb30cdae1921719a3cbcab696ef53c',
@@ -17,7 +20,7 @@ class BandcampIE(InfoExtractor):
u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad" u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"
}, },
u'skip': u'There is a limit of 200 free downloads / month for the test song' u'skip': u'There is a limit of 200 free downloads / month for the test song'
} }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@@ -26,6 +29,23 @@ class BandcampIE(InfoExtractor):
# We get the link to the free download page # We get the link to the free download page
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage) m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
if m_download is None: if m_download is None:
m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
if m_trackinfo:
json_code = m_trackinfo.group(1)
data = json.loads(json_code)
for d in data:
formats = [{
'format_id': 'format_id',
'url': format_url,
'ext': format_id.partition('-')[0]
} for format_id, format_url in sorted(d['file'].items())]
return {
'id': compat_str(d['id']),
'title': d['title'],
'formats': formats,
}
else:
raise ExtractorError(u'No free songs found') raise ExtractorError(u'No free songs found')
download_link = m_download.group(1) download_link = m_download.group(1)
@@ -61,3 +81,49 @@ class BandcampIE(InfoExtractor):
} }
return [track_info] return [track_info]
class BandcampAlbumIE(InfoExtractor):
IE_NAME = u'Bandcamp:album'
_VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
_TEST = {
u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
u'playlist': [
{
u'file': u'1353101989.mp3',
u'md5': u'39bc1eded3476e927c724321ddf116cf',
u'info_dict': {
u'title': u'Intro',
}
},
{
u'file': u'38097443.mp3',
u'md5': u'1a2c32e2691474643e912cc6cd4bffaa',
u'info_dict': {
u'title': u'Kero One - Keep It Alive (Blazo remix)',
}
},
],
u'params': {
u'playlistend': 2
},
u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = mobj.group('title')
webpage = self._download_webpage(url, title)
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
if not tracks_paths:
raise ExtractorError(u'The page doesn\'t contain any track')
entries = [
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
for t_path in tracks_paths]
title = self._search_regex(r'album_title : "(.*?)"', webpage, u'title')
return {
'_type': 'playlist',
'title': title,
'entries': entries,
}

View File

@@ -75,16 +75,22 @@ class BrightcoveIE(InfoExtractor):
params = {'flashID': object_doc.attrib['id'], params = {'flashID': object_doc.attrib['id'],
'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'], 'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
} }
playerKey = find_xpath_attr(object_doc, './param', 'name', 'playerKey') def find_param(name):
node = find_xpath_attr(object_doc, './param', 'name', name)
if node is not None:
return node.attrib['value']
return None
playerKey = find_param('playerKey')
# Not all pages define this value # Not all pages define this value
if playerKey is not None: if playerKey is not None:
params['playerKey'] = playerKey.attrib['value'] params['playerKey'] = playerKey
videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer') # The three fields hold the id of the video
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
if videoPlayer is not None: if videoPlayer is not None:
params['@videoPlayer'] = videoPlayer.attrib['value'] params['@videoPlayer'] = videoPlayer
linkBase = find_xpath_attr(object_doc, './param', 'name', 'linkBaseURL') linkBase = find_param('linkBaseURL')
if linkBase is not None: if linkBase is not None:
params['linkBaseURL'] = linkBase.attrib['value'] params['linkBaseURL'] = linkBase
data = compat_urllib_parse.urlencode(params) data = compat_urllib_parse.urlencode(params)
return cls._FEDERATED_URL_TEMPLATE % data return cls._FEDERATED_URL_TEMPLATE % data

View File

@@ -1,10 +1,10 @@
# encoding: utf-8 # encoding: utf-8
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import unified_strdate from ..utils import unified_strdate
class CanalplusIE(InfoExtractor): class CanalplusIE(InfoExtractor):
_VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))' _VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s' _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
@@ -25,16 +25,15 @@ class CanalplusIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.groupdict().get('id')
if video_id is None: if video_id is None:
webpage = self._download_webpage(url, mobj.group('path')) webpage = self._download_webpage(url, mobj.group('path'))
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id') video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
info_url = self._VIDEO_INFO_TEMPLATE % video_id info_url = self._VIDEO_INFO_TEMPLATE % video_id
info_page = self._download_webpage(info_url,video_id, doc = self._download_xml(info_url,video_id,
u'Downloading video info') u'Downloading video info')
self.report_extraction(video_id) self.report_extraction(video_id)
doc = xml.etree.ElementTree.fromstring(info_page.encode('utf-8'))
video_info = [video for video in doc if video.find('ID').text == video_id][0] video_info = [video for video in doc if video.find('ID').text == video_id][0]
infos = video_info.find('INFOS') infos = video_info.find('INFOS')
media = video_info.find('MEDIA') media = video_info.find('MEDIA')

View File

@@ -0,0 +1,58 @@
import re
import time
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import ExtractorError
class ClipfishIE(InfoExtractor):
IE_NAME = u'clipfish'
_VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
_TEST = {
u'url': u'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
u'file': u'3966754.mp4',
u'md5': u'2521cd644e862936cf2e698206e47385',
u'info_dict': {
u'title': u'FIFA 14 - E3 2013 Trailer',
u'duration': 82,
},
u'skip': 'Blocked in the US'
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
(video_id, int(time.time())))
doc = self._download_xml(
info_url, video_id, note=u'Downloading info page')
title = doc.find('title').text
video_url = doc.find('filename').text
if video_url is None:
xml_bytes = xml.etree.ElementTree.tostring(doc)
raise ExtractorError(u'Cannot find video URL in document %r' %
xml_bytes)
thumbnail = doc.find('imageurl').text
duration_str = doc.find('duration').text
m = re.match(
r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
duration_str)
if m:
duration = (
(int(m.group('hours')) * 60 * 60) +
(int(m.group('minutes')) * 60) +
(int(m.group('seconds')))
)
else:
duration = None
return {
'id': video_id,
'title': title,
'url': video_url,
'thumbnail': thumbnail,
'duration': duration,
}

View File

@@ -0,0 +1,52 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
find_xpath_attr,
)
class ClipsyndicateIE(InfoExtractor):
_VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
u'md5': u'4d7d549451bad625e0ff3d7bd56d776c',
u'info_dict': {
u'id': u'4629301',
u'ext': u'mp4',
u'title': u'Brick Briscoe',
u'duration': 612,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
js_player = self._download_webpage(
'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
video_id, u'Downlaoding player')
# it includes a required token
flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars')
playlist_page = self._download_webpage(
'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
video_id, u'Downloading video info')
# Fix broken xml
playlist_page = re.sub('&', '&amp;', playlist_page)
pdoc = xml.etree.ElementTree.fromstring(playlist_page.encode('utf-8'))
track_doc = pdoc.find('trackList/track')
def find_param(name):
node = find_xpath_attr(track_doc, './/param', 'name', name)
if node is not None:
return node.attrib['value']
return {
'id': video_id,
'title': find_param('title'),
'url': track_doc.find('location').text,
'thumbnail': find_param('thumbnail'),
'duration': int(find_param('duration')),
}

View File

@@ -1,5 +1,4 @@
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import determine_ext from ..utils import determine_ext
@@ -33,8 +32,7 @@ class CNNIE(InfoExtractor):
path = mobj.group('path') path = mobj.group('path')
page_title = mobj.group('title') page_title = mobj.group('title')
info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
info_xml = self._download_webpage(info_url, page_title) info = self._download_xml(info_url, page_title)
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
formats = [] formats = []
for f in info.findall('files/file'): for f in info.findall('files/file'):

View File

@@ -1,5 +1,4 @@
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@@ -46,11 +45,10 @@ class CollegeHumorIE(InfoExtractor):
self.report_extraction(video_id) self.report_extraction(video_id)
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
metaXml = self._download_webpage(xmlUrl, video_id, mdoc = self._download_xml(xmlUrl, video_id,
u'Downloading info XML', u'Downloading info XML',
u'Unable to download video info XML') u'Unable to download video info XML')
mdoc = xml.etree.ElementTree.fromstring(metaXml)
try: try:
videoNode = mdoc.findall('./video')[0] videoNode = mdoc.findall('./video')[0]
youtubeIdNode = videoNode.find('./youtubeID') youtubeIdNode = videoNode.find('./youtubeID')
@@ -65,16 +63,13 @@ class CollegeHumorIE(InfoExtractor):
if next_url.endswith(u'manifest.f4m'): if next_url.endswith(u'manifest.f4m'):
manifest_url = next_url + '?hdcore=2.10.3' manifest_url = next_url + '?hdcore=2.10.3'
manifestXml = self._download_webpage(manifest_url, video_id, adoc = self._download_xml(manifest_url, video_id,
u'Downloading XML manifest', u'Downloading XML manifest',
u'Unable to download video info XML') u'Unable to download video info XML')
adoc = xml.etree.ElementTree.fromstring(manifestXml)
try: try:
media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
node_id = media_node.attrib['url']
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
except IndexError as err: except IndexError:
raise ExtractorError(u'Invalid manifest file') raise ExtractorError(u'Invalid manifest file')
url_pr = compat_urllib_parse_urlparse(info['thumbnail']) url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','') info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')

View File

@@ -1,7 +1,7 @@
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from .mtv import MTVIE, _media_xml_tag
from ..utils import ( from ..utils import (
compat_str, compat_str,
compat_urllib_parse, compat_urllib_parse,
@@ -11,7 +11,37 @@ from ..utils import (
) )
class ComedyCentralIE(InfoExtractor): class ComedyCentralIE(MTVIE):
_VALID_URL = r'http://www.comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
_FEED_URL = u'http://comedycentral.com/feeds/mrss/'
_TEST = {
u'url': u'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
u'md5': u'4167875aae411f903b751a21f357f1ee',
u'info_dict': {
u'id': u'cef0cbb3-e776-4bc9-b62e-8016deccb354',
u'ext': u'mp4',
u'title': u'Uncensored - Greg Fitzsimmons - Too Good of a Mother',
u'description': u'After a certain point, breastfeeding becomes c**kblocking.',
},
}
# Overwrite MTVIE properties we don't want
_TESTS = []
def _get_thumbnail_url(self, uri, itemdoc):
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
return itemdoc.find(search_path).attrib['url']
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = mobj.group('title')
webpage = self._download_webpage(url, title)
mgid = self._search_regex(r'data-mgid="(?P<mgid>mgid:.*?)"',
webpage, u'mgid')
return self._get_videos_info(mgid)
class ComedyCentralShowsIE(InfoExtractor):
IE_DESC = u'The Daily Show / Colbert Report' IE_DESC = u'The Daily Show / Colbert Report'
# urls can be abbreviations like :thedailyshow or :colbert # urls can be abbreviations like :thedailyshow or :colbert
# urls for episodes like: # urls for episodes like:
@@ -127,13 +157,12 @@ class ComedyCentralIE(InfoExtractor):
uri = mMovieParams[0][1] uri = mMovieParams[0][1]
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri}) indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
indexXml = self._download_webpage(indexUrl, epTitle, idoc = self._download_xml(indexUrl, epTitle,
u'Downloading show index', u'Downloading show index',
u'unable to download episode index') u'unable to download episode index')
results = [] results = []
idoc = xml.etree.ElementTree.fromstring(indexXml)
itemEls = idoc.findall('.//item') itemEls = idoc.findall('.//item')
for partNum,itemEl in enumerate(itemEls): for partNum,itemEl in enumerate(itemEls):
mediaId = itemEl.findall('./guid')[0].text mediaId = itemEl.findall('./guid')[0].text
@@ -144,10 +173,9 @@ class ComedyCentralIE(InfoExtractor):
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
compat_urllib_parse.urlencode({'uri': mediaId})) compat_urllib_parse.urlencode({'uri': mediaId}))
configXml = self._download_webpage(configUrl, epTitle, cdoc = self._download_xml(configUrl, epTitle,
u'Downloading configuration for %s' % shortMediaId) u'Downloading configuration for %s' % shortMediaId)
cdoc = xml.etree.ElementTree.fromstring(configXml)
turls = [] turls = []
for rendition in cdoc.findall('.//rendition'): for rendition in cdoc.findall('.//rendition'):
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text) finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)

View File

@@ -4,11 +4,11 @@ import re
import socket import socket
import sys import sys
import netrc import netrc
import xml.etree.ElementTree
from ..utils import ( from ..utils import (
compat_http_client, compat_http_client,
compat_urllib_error, compat_urllib_error,
compat_urllib_request,
compat_str, compat_str,
clean_html, clean_html,
@@ -19,6 +19,7 @@ from ..utils import (
unescapeHTML, unescapeHTML,
) )
class InfoExtractor(object): class InfoExtractor(object):
"""Information Extractor class. """Information Extractor class.
@@ -71,6 +72,11 @@ class InfoExtractor(object):
("3D" or "DASH video") ("3D" or "DASH video")
* width Width of the video, if known * width Width of the video, if known
* height Height of the video, if known * height Height of the video, if known
* abr Average audio bitrate in KBit/s
* acodec Name of the audio codec in use
* vbr Average video bitrate in KBit/s
* vcodec Name of the video codec in use
* filesize The number of bytes, if known in advance
webpage_url: The url to the video webpage, if given to youtube-dl it webpage_url: The url to the video webpage, if given to youtube-dl it
should allow to get the same result again. (It will be set should allow to get the same result again. (It will be set
by YoutubeDL if it's missing) by YoutubeDL if it's missing)
@@ -152,7 +158,7 @@ class InfoExtractor(object):
elif note is not False: elif note is not False:
self.to_screen(u'%s: %s' % (video_id, note)) self.to_screen(u'%s: %s' % (video_id, note))
try: try:
return compat_urllib_request.urlopen(url_or_request) return self._downloader.urlopen(url_or_request)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
if errnote is None: if errnote is None:
errnote = u'Unable to download webpage' errnote = u'Unable to download webpage'
@@ -204,6 +210,12 @@ class InfoExtractor(object):
""" Returns the data of the page as a string """ """ Returns the data of the page as a string """
return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0] return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
def _download_xml(self, url_or_request, video_id,
note=u'Downloading XML', errnote=u'Unable to download XML'):
"""Return the xml as an xml.etree.ElementTree.Element"""
xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
def to_screen(self, msg): def to_screen(self, msg):
"""Print msg to screen, prefixing it with '[ie_name]'""" """Print msg to screen, prefixing it with '[ie_name]'"""
self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg)) self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
@@ -225,12 +237,14 @@ class InfoExtractor(object):
self.to_screen(u'Logging in') self.to_screen(u'Logging in')
#Methods for following #608 #Methods for following #608
def url_result(self, url, ie=None): def url_result(self, url, ie=None, video_id=None):
"""Returns a url that points to a page that should be processed""" """Returns a url that points to a page that should be processed"""
#TODO: ie should be the class used for getting the info #TODO: ie should be the class used for getting the info
video_info = {'_type': 'url', video_info = {'_type': 'url',
'url': url, 'url': url,
'ie_key': ie} 'ie_key': ie}
if video_id is not None:
video_info['id'] = video_id
return video_info return video_info
def playlist_result(self, entries, playlist_id=None, playlist_title=None): def playlist_result(self, entries, playlist_id=None, playlist_title=None):
"""Returns a playlist""" """Returns a playlist"""
@@ -315,13 +329,19 @@ class InfoExtractor(object):
# Helper functions for extracting OpenGraph info # Helper functions for extracting OpenGraph info
@staticmethod @staticmethod
def _og_regex(prop): def _og_regexes(prop):
return r'<meta.+?property=[\'"]og:%s[\'"].+?content=(?:"(.+?)"|\'(.+?)\')' % re.escape(prop) content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')'
property_re = r'property=[\'"]og:%s[\'"]' % re.escape(prop)
template = r'<meta[^>]+?%s[^>]+?%s'
return [
template % (property_re, content_re),
template % (content_re, property_re),
]
def _og_search_property(self, prop, html, name=None, **kargs): def _og_search_property(self, prop, html, name=None, **kargs):
if name is None: if name is None:
name = 'OpenGraph %s' % prop name = 'OpenGraph %s' % prop
escaped = self._search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs) escaped = self._search_regex(self._og_regexes(prop), html, name, flags=re.DOTALL, **kargs)
if escaped is None: if escaped is None:
return None return None
return unescapeHTML(escaped) return unescapeHTML(escaped)
@@ -336,10 +356,21 @@ class InfoExtractor(object):
return self._og_search_property('title', html, **kargs) return self._og_search_property('title', html, **kargs)
def _og_search_video_url(self, html, name='video url', secure=True, **kargs): def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
regexes = [self._og_regex('video')] regexes = self._og_regexes('video')
if secure: regexes.insert(0, self._og_regex('video:secure_url')) if secure: regexes = self._og_regexes('video:secure_url') + regexes
return self._html_search_regex(regexes, html, name, **kargs) return self._html_search_regex(regexes, html, name, **kargs)
def _html_search_meta(self, name, html, display_name=None):
if display_name is None:
display_name = name
return self._html_search_regex(
r'''(?ix)<meta(?=[^>]+(?:name|property)=["\']%s["\'])
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
html, display_name, fatal=False)
def _dc_search_uploader(self, html):
return self._html_search_meta('dc.creator', html, 'uploader')
def _rta_search(self, html): def _rta_search(self, html):
# See http://www.rtalabel.org/index.php?content=howtofaq#single # See http://www.rtalabel.org/index.php?content=howtofaq#single
if re.search(r'(?ix)<meta\s+name="rating"\s+' if re.search(r'(?ix)<meta\s+name="rating"\s+'
@@ -348,6 +379,23 @@ class InfoExtractor(object):
return 18 return 18
return 0 return 0
def _media_rating_search(self, html):
# See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/
rating = self._html_search_meta('rating', html)
if not rating:
return None
RATING_TABLE = {
'safe for kids': 0,
'general': 8,
'14 years': 14,
'mature': 17,
'restricted': 19,
}
return RATING_TABLE.get(rating.lower(), None)
class SearchInfoExtractor(InfoExtractor): class SearchInfoExtractor(InfoExtractor):
""" """

View File

@@ -0,0 +1,22 @@
# encoding: utf-8
from .canalplus import CanalplusIE
class D8IE(CanalplusIE):
_VALID_URL = r'https?://www\.d8\.tv/.*?/(?P<path>.*)'
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/d8/%s'
IE_NAME = u'd8.tv'
_TEST = {
u'url': u'http://www.d8.tv/d8-docs-mags/pid6589-d8-campagne-intime.html',
u'file': u'966289.flv',
u'info_dict': {
u'title': u'Campagne intime - Documentaire exceptionnel',
u'description': u'md5:d2643b799fb190846ae09c61e59a859f',
u'upload_date': u'20131108',
},
u'params': {
# rtmp
u'skip_download': True,
},
}

View File

@@ -186,7 +186,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
webpage = self._download_webpage(request, webpage = self._download_webpage(request,
id, u'Downloading page %s' % pagenum) id, u'Downloading page %s' % pagenum)
playlist_el = get_element_by_attribute(u'class', u'video_list', webpage) playlist_el = get_element_by_attribute(u'class', u'row video_list', webpage)
video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el)) video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:

View File

@@ -1,6 +1,5 @@
# encoding: utf-8 # encoding: utf-8
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@@ -32,14 +31,12 @@ class DaumIE(InfoExtractor):
full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"', full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
webpage, u'full id') webpage, u'full id')
query = compat_urllib_parse.urlencode({'vid': full_id}) query = compat_urllib_parse.urlencode({'vid': full_id})
info_xml = self._download_webpage( info = self._download_xml(
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id, 'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
u'Downloading video info') u'Downloading video info')
urls_xml = self._download_webpage( urls = self._download_xml(
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query, 'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
video_id, u'Downloading video formats info') video_id, u'Downloading video formats info')
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
self.to_screen(u'%s: Getting video urls' % video_id) self.to_screen(u'%s: Getting video urls' % video_id)
formats = [] formats = []
@@ -49,10 +46,9 @@ class DaumIE(InfoExtractor):
'vid': full_id, 'vid': full_id,
'profile': profile, 'profile': profile,
}) })
url_xml = self._download_webpage( url_doc = self._download_xml(
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query, 'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
video_id, note=False) video_id, note=False)
url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8'))
format_url = url_doc.find('result/url').text format_url = url_doc.find('result/url').text
formats.append({ formats.append({
'url': format_url, 'url': format_url,

View File

@@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@@ -30,8 +29,7 @@ class DreiSatIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
details_xml = self._download_webpage(details_url, video_id, note=u'Downloading video details') details_doc = self._download_xml(details_url, video_id, note=u'Downloading video details')
details_doc = xml.etree.ElementTree.fromstring(details_xml.encode('utf-8'))
thumbnail_els = details_doc.findall('.//teaserimage') thumbnail_els = details_doc.findall('.//teaserimage')
thumbnails = [{ thumbnails = [{

View File

@@ -1,5 +1,4 @@
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import determine_ext from ..utils import determine_ext
@@ -21,9 +20,8 @@ class EbaumsWorldIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
config_xml = self._download_webpage( config = self._download_xml(
'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id) 'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
video_url = config.find('file').text video_url = config.find('file').text
return { return {

View File

@@ -1,4 +1,3 @@
import itertools
import json import json
import random import random
import re import re

View File

@@ -11,11 +11,11 @@ from ..utils import (
class EscapistIE(InfoExtractor): class EscapistIE(InfoExtractor):
_VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$' _VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
_TEST = { _TEST = {
u'url': u'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate', u'url': u'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
u'file': u'6618-Breaking-Down-Baldurs-Gate.mp4', u'file': u'6618-Breaking-Down-Baldurs-Gate.mp4',
u'md5': u'c6793dbda81388f4264c1ba18684a74d', u'md5': u'ab3a706c681efca53f0a35f1415cf0d1',
u'info_dict': { u'info_dict': {
u"description": u"Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.", u"description": u"Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
u"uploader": u"the-escapist-presents", u"uploader": u"the-escapist-presents",
@@ -25,50 +25,60 @@ class EscapistIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
showName = mobj.group('showname') showName = mobj.group('showname')
videoId = mobj.group('episode') videoId = mobj.group('episode')
self.report_extraction(videoId) self.report_extraction(videoId)
webpage = self._download_webpage(url, videoId) webpage = self._download_webpage(url, videoId)
videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"', videoDesc = self._html_search_regex(
r'<meta name="description" content="([^"]*)"',
webpage, u'description', fatal=False) webpage, u'description', fatal=False)
playerUrl = self._og_search_video_url(webpage, name='player url') playerUrl = self._og_search_video_url(webpage, name=u'player URL')
title = self._html_search_regex('<meta name="title" content="([^"]*)"', title = self._html_search_regex(
webpage, u'player url').split(' : ')[-1] r'<meta name="title" content="([^"]*)"',
webpage, u'title').split(' : ')[-1]
configUrl = self._search_regex('config=(.*)$', playerUrl, u'config url') configUrl = self._search_regex('config=(.*)$', playerUrl, u'config URL')
configUrl = compat_urllib_parse.unquote(configUrl) configUrl = compat_urllib_parse.unquote(configUrl)
configJSON = self._download_webpage(configUrl, videoId, formats = []
u'Downloading configuration',
u'unable to download configuration')
# Technically, it's JavaScript, not JSON def _add_format(name, cfgurl):
configJSON = configJSON.replace("'", '"') configJSON = self._download_webpage(
cfgurl, videoId,
u'Downloading ' + name + ' configuration',
u'Unable to download ' + name + ' configuration')
# Technically, it's JavaScript, not JSON
configJSON = configJSON.replace("'", '"')
try:
config = json.loads(configJSON)
except (ValueError,) as err:
raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err))
playlist = config['playlist']
formats.append({
'url': playlist[1]['url'],
'format_id': name,
})
_add_format(u'normal', configUrl)
hq_url = (configUrl +
('&hq=1' if '?' in configUrl else configUrl + '?hq=1'))
try: try:
config = json.loads(configJSON) _add_format(u'hq', hq_url)
except (ValueError,) as err: except ExtractorError:
raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err)) pass # That's fine, we'll just use normal quality
playlist = config['playlist'] return {
videoUrl = playlist[1]['url']
info = {
'id': videoId, 'id': videoId,
'url': videoUrl, 'formats': formats,
'uploader': showName, 'uploader': showName,
'upload_date': None,
'title': title, 'title': title,
'ext': 'mp4',
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),
'description': videoDesc, 'description': videoDesc,
'player_url': playerUrl, 'player_url': playerUrl,
} }
return [info]

View File

@@ -1,5 +1,4 @@
import json import json
import netrc
import re import re
import socket import socket

View File

@@ -1,6 +1,5 @@
# encoding: utf-8 # encoding: utf-8
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@@ -28,9 +27,8 @@ class FazIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage, config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage,
u'config xml url') u'config xml url')
config_xml = self._download_webpage(config_xml_url, video_id, config = self._download_xml(config_xml_url, video_id,
u'Downloading config xml') u'Downloading config xml')
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
encodings = config.find('ENCODINGS') encodings = config.find('ENCODINGS')
formats = [] formats = []

View File

@@ -39,7 +39,6 @@ class FKTVIE(InfoExtractor):
for i, _ in enumerate(files, 1): for i, _ in enumerate(files, 1):
video_id = '%04d%d' % (episode, i) video_id = '%04d%d' % (episode, i)
video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i) video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i)
video_title = 'Fernsehkritik %d.%d' % (episode, i)
videos.append({ videos.append({
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_url,

View File

@@ -1,6 +1,5 @@
# encoding: utf-8 # encoding: utf-8
import re import re
import xml.etree.ElementTree
import json import json
from .common import InfoExtractor from .common import InfoExtractor
@@ -11,11 +10,10 @@ from ..utils import (
class FranceTVBaseInfoExtractor(InfoExtractor): class FranceTVBaseInfoExtractor(InfoExtractor):
def _extract_video(self, video_id): def _extract_video(self, video_id):
xml_desc = self._download_webpage( info = self._download_xml(
'http://www.francetvinfo.fr/appftv/webservices/video/' 'http://www.francetvinfo.fr/appftv/webservices/video/'
'getInfosOeuvre.php?id-diffusion=' 'getInfosOeuvre.php?id-diffusion='
+ video_id, video_id, 'Downloading XML config') + video_id, video_id, 'Downloading XML config')
info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))
manifest_url = info.find('videos/video/url').text manifest_url = info.find('videos/video/url').text
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8') video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')

View File

@@ -1,9 +1,6 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
determine_ext,
)
class GamekingsIE(InfoExtractor): class GamekingsIE(InfoExtractor):
@@ -11,7 +8,8 @@ class GamekingsIE(InfoExtractor):
_TEST = { _TEST = {
u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/", u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/",
u'file': u'20130811.mp4', u'file': u'20130811.mp4',
u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3', # MD5 is flaky, seems to change regularly
#u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3',
u'info_dict': { u'info_dict': {
u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review", u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review",
u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.", u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.",

View File

@@ -24,7 +24,7 @@ class GameSpotIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
page_id = video_id = mobj.group('page_id') page_id = mobj.group('page_id')
webpage = self._download_webpage(url, page_id) webpage = self._download_webpage(url, page_id)
data_video_json = self._search_regex(r'data-video=\'(.*?)\'', webpage, u'data video') data_video_json = self._search_regex(r'data-video=\'(.*?)\'', webpage, u'data video')
data_video = json.loads(unescapeHTML(data_video_json)) data_video = json.loads(unescapeHTML(data_video_json))

View File

@@ -162,6 +162,16 @@ class GenericIE(InfoExtractor):
raise ExtractorError(u'Failed to download URL: %s' % url) raise ExtractorError(u'Failed to download URL: %s' % url)
self.report_extraction(video_id) self.report_extraction(video_id)
# it's tempting to parse this further, but you would
# have to take into account all the variations like
# Video Title - Site Name
# Site Name | Video Title
# Video Title - Tagline | Site Name
# and so on and so forth; it's just not practical
video_title = self._html_search_regex(r'<title>(.*)</title>',
webpage, u'video title', default=u'video', flags=re.DOTALL)
# Look for BrightCove: # Look for BrightCove:
bc_url = BrightcoveIE._extract_brightcove_url(webpage) bc_url = BrightcoveIE._extract_brightcove_url(webpage)
if bc_url is not None: if bc_url is not None:
@@ -177,17 +187,29 @@ class GenericIE(InfoExtractor):
return self.url_result(surl, 'Vimeo') return self.url_result(surl, 'Vimeo')
# Look for embedded YouTube player # Look for embedded YouTube player
mobj = re.search( matches = re.findall(
r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?youtube.com/embed/.+?)\1', webpage) r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube.com/embed/.+?)\1', webpage)
if mobj: if matches:
surl = unescapeHTML(mobj.group(u'url')) urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
return self.url_result(surl, 'Youtube') for tuppl in matches]
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
# Look for embedded Dailymotion player
matches = re.findall(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion.com/embed/video/.+?)\1', webpage)
if matches:
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion')
for tuppl in matches]
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
# Look for Bandcamp pages with custom domain # Look for Bandcamp pages with custom domain
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage) mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
if mobj is not None: if mobj is not None:
burl = unescapeHTML(mobj.group(1)) burl = unescapeHTML(mobj.group(1))
return self.url_result(burl, 'Bandcamp') # Don't set the extractor because it can be a track url or an album
return self.url_result(burl)
# Start with something easy: JW Player in SWFObject # Start with something easy: JW Player in SWFObject
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
@@ -196,7 +218,7 @@ class GenericIE(InfoExtractor):
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage) mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
if mobj is None: if mobj is None:
# Broaden the search a little bit: JWPlayer JS loader # Broaden the search a little bit: JWPlayer JS loader
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"&]*)', webpage) mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"]*)', webpage)
if mobj is None: if mobj is None:
# Try to find twitter cards info # Try to find twitter cards info
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage) mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
@@ -223,27 +245,16 @@ class GenericIE(InfoExtractor):
video_id = compat_urllib_parse.unquote(os.path.basename(video_url)) video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
# here's a fun little line of code for you: # here's a fun little line of code for you:
video_extension = os.path.splitext(video_id)[1][1:]
video_id = os.path.splitext(video_id)[0] video_id = os.path.splitext(video_id)[0]
# it's tempting to parse this further, but you would
# have to take into account all the variations like
# Video Title - Site Name
# Site Name | Video Title
# Video Title - Tagline | Site Name
# and so on and so forth; it's just not practical
video_title = self._html_search_regex(r'<title>(.*)</title>',
webpage, u'video title', default=u'video', flags=re.DOTALL)
# video uploader is domain name # video uploader is domain name
video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*', video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
url, u'video uploader') url, u'video uploader')
return [{ return {
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_url,
'uploader': video_uploader, 'uploader': video_uploader,
'upload_date': None, 'upload_date': None,
'title': video_title, 'title': video_title,
'ext': video_extension, }
}]

View File

@@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
_TEST = { _TEST = {
u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly', u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
u'file': u'390161.mp4', u'file': u'390161.mp4',
u'md5': u'1d7ba54e2c9d7dc6935ef39e00529138', u'md5': u'8b743df908c42f60cf6496586c7f12c3',
u'info_dict': { u'info_dict': {
u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.", u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.",
u"title": u"How to Tie a Square Knot Properly" u"title": u"How to Tie a Square Knot Properly"

View File

@@ -0,0 +1,59 @@
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
get_element_by_attribute,
)
class ImdbIE(InfoExtractor):
IE_NAME = u'imdb'
IE_DESC = u'Internet Movie Database trailers'
_VALID_URL = r'http://www\.imdb\.com/video/imdb/vi(?P<id>\d+)'
_TEST = {
u'url': u'http://www.imdb.com/video/imdb/vi2524815897',
u'md5': u'9f34fa777ade3a6e57a054fdbcb3a068',
u'info_dict': {
u'id': u'2524815897',
u'ext': u'mp4',
u'title': u'Ice Age: Continental Drift Trailer (No. 2) - IMDb',
u'description': u'md5:9061c2219254e5d14e03c25c98e96a81',
u'duration': 151,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url,video_id)
descr = get_element_by_attribute('itemprop', 'description', webpage)
available_formats = re.findall(
r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage,
flags=re.MULTILINE)
formats = []
for f_id, f_path in available_formats:
format_page = self._download_webpage(
compat_urlparse.urljoin(url, f_path),
u'Downloading info for %s format' % f_id)
json_data = self._search_regex(
r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
format_page, u'json data', flags=re.DOTALL)
info = json.loads(json_data)
format_info = info['videoPlayerObject']['video']
formats.append({
'format_id': f_id,
'url': format_info['url'],
'height': int(info['titleObject']['encoding']['selected'][:-1]),
})
return {
'id': video_id,
'title': self._og_search_title(webpage),
'formats': formats,
'description': descr,
'thumbnail': format_info['slate'],
'duration': int(info['titleObject']['title']['duration_seconds']),
}

View File

@@ -1,5 +1,4 @@
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@@ -43,9 +42,8 @@ class InternetVideoArchiveIE(InfoExtractor):
video_id = query_dic['publishedid'][0] video_id = query_dic['publishedid'][0]
url = self._build_url(query) url = self._build_url(query)
flashconfiguration_xml = self._download_webpage(url, video_id, flashconfiguration = self._download_xml(url, video_id,
u'Downloading flash configuration') u'Downloading flash configuration')
flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8'))
file_url = flashconfiguration.find('file').text file_url = flashconfiguration.find('file').text
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx') file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
# Replace some of the parameters in the query to get the best quality # Replace some of the parameters in the query to get the best quality
@@ -53,9 +51,8 @@ class InternetVideoArchiveIE(InfoExtractor):
file_url = re.sub(r'(?<=\?)(.+)$', file_url = re.sub(r'(?<=\?)(.+)$',
lambda m: self._clean_query(m.group()), lambda m: self._clean_query(m.group()),
file_url) file_url)
info_xml = self._download_webpage(file_url, video_id, info = self._download_xml(file_url, video_id,
u'Downloading video info') u'Downloading video info')
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
item = info.find('channel/item') item = info.find('channel/item')
def _bp(p): def _bp(p):

View File

@@ -2,7 +2,6 @@
import json import json
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
@@ -22,7 +21,7 @@ class JeuxVideoIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
title = re.match(self._VALID_URL, url).group(1) title = mobj.group(1)
webpage = self._download_webpage(url, title) webpage = self._download_webpage(url, title)
xml_link = self._html_search_regex( xml_link = self._html_search_regex(
r'<param name="flashvars" value="config=(.*?)" />', r'<param name="flashvars" value="config=(.*?)" />',
@@ -32,12 +31,9 @@ class JeuxVideoIE(InfoExtractor):
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml', r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
xml_link, u'video ID') xml_link, u'video ID')
xml_config = self._download_webpage( config = self._download_xml(
xml_link, title, u'Downloading XML config') xml_link, title, u'Downloading XML config')
config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8')) info_json = config.find('format.json').text
info_json = self._search_regex(
r'(?sm)<format\.json>(.*?)</format\.json>',
xml_config, u'JSON information')
info = json.loads(info_json)['versions'][0] info = json.loads(info_json)['versions'][0]
video_url = 'http://video720.jeuxvideo.com/' + info['file'] video_url = 'http://video720.jeuxvideo.com/' + info['file']

View File

@@ -1,7 +1,6 @@
import json import json
import os import os
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@@ -94,10 +93,9 @@ class JustinTVIE(InfoExtractor):
archive_id = m.group(1) archive_id = m.group(1)
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
chapter_info_xml = self._download_webpage(api, chapter_id, doc = self._download_xml(api, chapter_id,
note=u'Downloading chapter information', note=u'Downloading chapter information',
errnote=u'Chapter information download failed') errnote=u'Chapter information download failed')
doc = xml.etree.ElementTree.fromstring(chapter_info_xml)
for a in doc.findall('.//archive'): for a in doc.findall('.//archive'):
if archive_id == a.find('./id').text: if archive_id == a.find('./id').text:
break break

View File

@@ -1,14 +1,11 @@
import re import re
import json import json
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_urlparse, compat_urlparse,
get_meta_content,
xpath_with_ns, xpath_with_ns,
ExtractorError,
) )
@@ -82,8 +79,7 @@ class LivestreamOriginalIE(InfoExtractor):
user = mobj.group('user') user = mobj.group('user')
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id) api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
api_response = self._download_webpage(api_url, video_id) info = self._download_xml(api_url, video_id)
info = xml.etree.ElementTree.fromstring(api_response.encode('utf-8'))
item = info.find('channel').find('item') item = info.find('channel').find('item')
ns = {'media': 'http://search.yahoo.com/mrss'} ns = {'media': 'http://search.yahoo.com/mrss'}
thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url'] thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']

View File

@@ -60,7 +60,7 @@ class MixcloudIE(InfoExtractor):
'title': info['name'], 'title': info['name'],
'url': final_song_url, 'url': final_song_url,
'ext': 'mp3', 'ext': 'mp3',
'description': info['description'], 'description': info.get('description'),
'thumbnail': info['pictures'].get('extra_large'), 'thumbnail': info['pictures'].get('extra_large'),
'uploader': info['user']['name'], 'uploader': info['user']['name'],
'uploader_id': info['user']['username'], 'uploader_id': info['user']['username'],

View File

@@ -48,7 +48,7 @@ class MTVIE(InfoExtractor):
def _transform_rtmp_url(rtmp_video_url): def _transform_rtmp_url(rtmp_video_url):
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url) m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
if not m: if not m:
raise ExtractorError(u'Cannot transform RTMP url') return rtmp_video_url
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
return base + m.group('finalid') return base + m.group('finalid')
@@ -59,7 +59,6 @@ class MTVIE(InfoExtractor):
if '/error_country_block.swf' in metadataXml: if '/error_country_block.swf' in metadataXml:
raise ExtractorError(u'This video is not available from your country.', expected=True) raise ExtractorError(u'This video is not available from your country.', expected=True)
mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8')) mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8'))
renditions = mdoc.findall('.//rendition')
formats = [] formats = []
for rendition in mdoc.findall('.//rendition'): for rendition in mdoc.findall('.//rendition'):
@@ -110,9 +109,8 @@ class MTVIE(InfoExtractor):
def _get_videos_info(self, uri): def _get_videos_info(self, uri):
video_id = self._id_from_uri(uri) video_id = self._id_from_uri(uri)
data = compat_urllib_parse.urlencode({'uri': uri}) data = compat_urllib_parse.urlencode({'uri': uri})
infoXml = self._download_webpage(self._FEED_URL +'?' + data, video_id, idoc = self._download_xml(self._FEED_URL +'?' + data, video_id,
u'Downloading info') u'Downloading info')
idoc = xml.etree.ElementTree.fromstring(infoXml.encode('utf-8'))
return [self._get_video_info(item) for item in idoc.findall('.//item')] return [self._get_video_info(item) for item in idoc.findall('.//item')]
def _real_extract(self, url): def _real_extract(self, url):

View File

@@ -1,5 +1,4 @@
import os.path import os.path
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@@ -33,8 +32,7 @@ class MySpassIE(InfoExtractor):
# get metadata # get metadata
metadata_url = META_DATA_URL_TEMPLATE % video_id metadata_url = META_DATA_URL_TEMPLATE % video_id
metadata_text = self._download_webpage(metadata_url, video_id) metadata = self._download_xml(metadata_url, video_id)
metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
# extract values from metadata # extract values from metadata
url_flv_el = metadata.find('url_flv') url_flv_el = metadata.find('url_flv')

View File

@@ -1,6 +1,5 @@
# encoding: utf-8 # encoding: utf-8
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@@ -38,14 +37,12 @@ class NaverIE(InfoExtractor):
'protocol': 'p2p', 'protocol': 'p2p',
'inKey': key, 'inKey': key,
}) })
info_xml = self._download_webpage( info = self._download_xml(
'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query, 'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
video_id, u'Downloading video info') video_id, u'Downloading video info')
urls_xml = self._download_webpage( urls = self._download_xml(
'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls, 'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
video_id, u'Downloading video formats info') video_id, u'Downloading video formats info')
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
formats = [] formats = []
for format_el in urls.findall('EncodingOptions/EncodingOption'): for format_el in urls.findall('EncodingOptions/EncodingOption'):

View File

@@ -1,5 +1,4 @@
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import find_xpath_attr, compat_str from ..utils import find_xpath_attr, compat_str
@@ -21,8 +20,8 @@ class NBCNewsIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id) all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video') info = all_info.find('video')
return {'id': video_id, return {'id': video_id,
'title': info.find('headline').text, 'title': info.find('headline').text,

View File

@@ -1,6 +1,5 @@
import re import re
import json import json
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@@ -26,9 +25,8 @@ class NHLBaseInfoExtractor(InfoExtractor):
'path': initial_video_url.replace('.mp4', '_sd.mp4'), 'path': initial_video_url.replace('.mp4', '_sd.mp4'),
}) })
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
path_response = self._download_webpage(path_url, video_id, path_doc = self._download_xml(path_url, video_id,
u'Downloading final video url') u'Downloading final video url')
path_doc = xml.etree.ElementTree.fromstring(path_response)
video_url = path_doc.find('path').text video_url = path_doc.find('path').text
join = compat_urlparse.urljoin join = compat_urlparse.urljoin
@@ -72,7 +70,7 @@ class NHLIE(NHLBaseInfoExtractor):
class NHLVideocenterIE(NHLBaseInfoExtractor): class NHLVideocenterIE(NHLBaseInfoExtractor):
IE_NAME = u'nhl.com:videocenter' IE_NAME = u'nhl.com:videocenter'
IE_DESC = u'Download the first 12 videos from a videocenter category' IE_DESC = u'NHL videocenter category'
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?' _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?'
@classmethod @classmethod

View File

@@ -0,0 +1,127 @@
# encoding: utf-8
import re
import socket
from .common import InfoExtractor
from ..utils import (
compat_http_client,
compat_urllib_error,
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
compat_str,
ExtractorError,
unified_strdate,
)
class NiconicoIE(InfoExtractor):
IE_NAME = u'niconico'
IE_DESC = u'ニコニコ動画'
_TEST = {
u'url': u'http://www.nicovideo.jp/watch/sm22312215',
u'file': u'sm22312215.mp4',
u'md5': u'd1a75c0823e2f629128c43e1212760f9',
u'info_dict': {
u'title': u'Big Buck Bunny',
u'uploader': u'takuya0301',
u'uploader_id': u'2698420',
u'upload_date': u'20131123',
u'description': u'(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
},
u'params': {
u'username': u'ydl.niconico@gmail.com',
u'password': u'youtube-dl',
},
}
_VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
_NETRC_MACHINE = 'niconico'
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = True
def _real_initialize(self):
self._login()
def _login(self):
(username, password) = self._get_login_info()
# No authentication to be performed
if username is None:
if self._LOGIN_REQUIRED:
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
return False
# Log in
login_form_strs = {
u'mail': username,
u'password': password,
}
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
# chokes on unicode
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
request = compat_urllib_request.Request(
u'https://secure.nicovideo.jp/secure/login', login_data)
login_results = self._download_webpage(
request, u'', note=u'Logging in', errnote=u'Unable to log in')
if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
self._downloader.report_warning(u'unable to log in: bad username or password')
return False
return True
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
# Get video webpage. We are not actually interested in it, but need
# the cookies in order to be able to download the info webpage
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
video_info = self._download_xml(
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
note=u'Downloading video info page')
# Get flv info
flv_info_webpage = self._download_webpage(
u'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
video_id, u'Downloading flv info')
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
# Start extracting information
video_title = video_info.find('.//title').text
video_extension = video_info.find('.//movie_type').text
video_format = video_extension.upper()
video_thumbnail = video_info.find('.//thumbnail_url').text
video_description = video_info.find('.//description').text
video_uploader_id = video_info.find('.//user_id').text
video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
video_view_count = video_info.find('.//view_counter').text
video_webpage_url = video_info.find('.//watch_url').text
# uploader
video_uploader = video_uploader_id
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
try:
user_info = self._download_xml(
url, video_id, note=u'Downloading user information')
video_uploader = user_info.find('.//nickname').text
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
return {
'id': video_id,
'url': video_real_url,
'title': video_title,
'ext': video_extension,
'format': video_format,
'thumbnail': video_thumbnail,
'description': video_description,
'uploader': video_uploader,
'upload_date': video_upload_date,
'uploader_id': video_uploader_id,
'view_count': video_view_count,
'webpage_url': video_webpage_url,
}

View File

@@ -0,0 +1,49 @@
import json
import re
from .common import InfoExtractor
class PodomaticIE(InfoExtractor):
IE_NAME = 'podomatic'
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
_TEST = {
u"url": u"http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
u"file": u"2009-01-02T16_03_35-08_00.mp3",
u"md5": u"84bb855fcf3429e6bf72460e1eed782d",
u"info_dict": {
u"uploader": u"Science Teaching Tips",
u"uploader_id": u"scienceteachingtips",
u"title": u"64. When the Moon Hits Your Eye",
u"duration": 446,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
channel = mobj.group('channel')
json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' +
'?permalink=true&rtmp=0') %
(mobj.group('proto'), channel, video_id))
data_json = self._download_webpage(
json_url, video_id, note=u'Downloading video info')
data = json.loads(data_json)
video_url = data['downloadLink']
uploader = data['podcast']
title = data['title']
thumbnail = data['imageLocation']
duration = int(data['length'] / 1000.0)
return {
'id': video_id,
'url': video_url,
'title': title,
'uploader': uploader,
'uploader_id': channel,
'thumbnail': thumbnail,
'duration': duration,
}

View File

@@ -6,7 +6,6 @@ from ..utils import (
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_urllib_request, compat_urllib_request,
compat_urllib_parse, compat_urllib_parse,
unescapeHTML,
) )
from ..aes import ( from ..aes import (
aes_decrypt_text aes_decrypt_text

View File

@@ -8,7 +8,9 @@ class RedTubeIE(InfoExtractor):
_TEST = { _TEST = {
u'url': u'http://www.redtube.com/66418', u'url': u'http://www.redtube.com/66418',
u'file': u'66418.mp4', u'file': u'66418.mp4',
u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d', # md5 varies from time to time, as in
# https://travis-ci.org/rg3/youtube-dl/jobs/14052463#L295
#u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
u'info_dict': { u'info_dict': {
u"title": u"Sucked on a toilet", u"title": u"Sucked on a toilet",
u"age_limit": 18, u"age_limit": 18,

View File

@@ -62,18 +62,6 @@ class RTLnowIE(InfoExtractor):
u'skip_download': True, u'skip_download': True,
}, },
}, },
{
u'url': u'http://www.rtlnitronow.de/recht-ordnung/stadtpolizei-frankfurt-gerichtsvollzieher-leipzig.php?film_id=129679&player=1&season=1',
u'file': u'129679.flv',
u'info_dict': {
u'upload_date': u'20131016',
u'title': u'Recht & Ordnung - Stadtpolizei Frankfurt/ Gerichtsvollzieher...',
u'description': u'Stadtpolizei Frankfurt/ Gerichtsvollzieher Leipzig',
},
u'params': {
u'skip_download': True,
},
},
{ {
u'url': u'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10', u'url': u'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10',
u'file': u'124903.flv', u'file': u'124903.flv',

View File

@@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@@ -35,12 +34,11 @@ class SinaIE(InfoExtractor):
def _extract_video(self, video_id): def _extract_video(self, video_id):
data = compat_urllib_parse.urlencode({'vid': video_id}) data = compat_urllib_parse.urlencode({'vid': video_id})
url_page = self._download_webpage('http://v.iask.com/v_play.php?%s' % data, url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
video_id, u'Downloading video url') video_id, u'Downloading video url')
image_page = self._download_webpage( image_page = self._download_webpage(
'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data, 'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
video_id, u'Downloading thumbnail info') video_id, u'Downloading thumbnail info')
url_doc = xml.etree.ElementTree.fromstring(url_page.encode('utf-8'))
return {'id': video_id, return {'id': video_id,
'url': url_doc.find('./durl/url').text, 'url': url_doc.find('./durl/url').text,

View File

@@ -59,6 +59,7 @@ class SoundcloudIE(InfoExtractor):
] ]
_CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28' _CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
@@ -75,36 +76,79 @@ class SoundcloudIE(InfoExtractor):
def _extract_info_dict(self, info, full_title=None, quiet=False): def _extract_info_dict(self, info, full_title=None, quiet=False):
track_id = compat_str(info['id']) track_id = compat_str(info['id'])
name = full_title or track_id name = full_title or track_id
if quiet == False: if quiet:
self.report_extraction(name) self.report_extraction(name)
thumbnail = info['artwork_url'] thumbnail = info['artwork_url']
if thumbnail is not None: if thumbnail is not None:
thumbnail = thumbnail.replace('-large', '-t500x500') thumbnail = thumbnail.replace('-large', '-t500x500')
ext = info.get('original_format', u'mp3')
result = { result = {
'id': track_id, 'id': track_id,
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
'uploader': info['user']['username'], 'uploader': info['user']['username'],
'upload_date': unified_strdate(info['created_at']), 'upload_date': unified_strdate(info['created_at']),
'title': info['title'], 'title': info['title'],
'ext': info.get('original_format', u'mp3'),
'description': info['description'], 'description': info['description'],
'thumbnail': thumbnail, 'thumbnail': thumbnail,
} }
if info.get('downloadable', False): if info.get('downloadable', False):
result['url'] = 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(track_id, self._CLIENT_ID) # We can build a direct link to the song
if not info.get('streamable', False): format_url = (
# We have to get the rtmp url u'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
track_id, self._CLIENT_ID))
result['formats'] = [{
'format_id': 'download',
'ext': ext,
'url': format_url,
'vcodec': 'none',
}]
else:
# We have to retrieve the url
stream_json = self._download_webpage( stream_json = self._download_webpage(
'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._CLIENT_ID), 'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._IPHONE_CLIENT_ID),
track_id, u'Downloading track url') track_id, u'Downloading track url')
rtmp_url = json.loads(stream_json)['rtmp_mp3_128_url']
# The url doesn't have an rtmp app, we have to extract the playpath formats = []
url, path = rtmp_url.split('mp3:', 1) format_dict = json.loads(stream_json)
result.update({ for key, stream_url in format_dict.items():
'url': url, if key.startswith(u'http'):
'play_path': 'mp3:' + path, formats.append({
}) 'format_id': key,
'ext': ext,
'url': stream_url,
'vcodec': 'none',
})
elif key.startswith(u'rtmp'):
# The url doesn't have an rtmp app, we have to extract the playpath
url, path = stream_url.split('mp3:', 1)
formats.append({
'format_id': key,
'url': url,
'play_path': 'mp3:' + path,
'ext': ext,
'vcodec': 'none',
})
if not formats:
# We fallback to the stream_url in the original info, this
# cannot be always used, sometimes it can give an HTTP 404 error
formats.append({
'format_id': u'fallback',
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
'ext': ext,
'vcodec': 'none',
})
def format_pref(f):
if f['format_id'].startswith('http'):
return 2
if f['format_id'].startswith('rtmp'):
return 1
return 0
formats.sort(key=format_pref)
result['formats'] = formats
return result return result
def _real_extract(self, url): def _real_extract(self, url):
@@ -158,7 +202,6 @@ class SoundcloudSetIE(SoundcloudIE):
resolv_url = self._resolv_url(url) resolv_url = self._resolv_url(url)
info_json = self._download_webpage(resolv_url, full_title) info_json = self._download_webpage(resolv_url, full_title)
videos = []
info = json.loads(info_json) info = json.loads(info_json)
if 'errors' in info: if 'errors' in info:
for err in info['errors']: for err in info['errors']:

View File

@@ -5,21 +5,19 @@ from .mtv import MTVIE, _media_xml_tag
class SouthParkStudiosIE(MTVIE): class SouthParkStudiosIE(MTVIE):
IE_NAME = u'southparkstudios.com' IE_NAME = u'southparkstudios.com'
_VALID_URL = r'https?://www\.southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$)' _VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
_TEST = { # Overwrite MTVIE properties we don't want
_TESTS = [{
u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured', u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4', u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
u'info_dict': { u'info_dict': {
u'title': u'Bat Daded', u'title': u'Bat Daded',
u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.', u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
}, },
} }]
# Overwrite MTVIE properties we don't want
_TESTS = []
def _get_thumbnail_url(self, uri, itemdoc): def _get_thumbnail_url(self, uri, itemdoc):
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
@@ -31,8 +29,23 @@ class SouthParkStudiosIE(MTVIE):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
url = u'http://www.' + mobj.group(u'url')
video_id = mobj.group('id') video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"', mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"',
webpage, u'mgid') webpage, u'mgid')
return self._get_videos_info(mgid) return self._get_videos_info(mgid)
class SouthparkDeIE(SouthParkStudiosIE):
IE_NAME = u'southpark.de'
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
_TESTS = [{
u'url': u'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',
u'file': u'85487c96-b3b9-4e39-9127-ad88583d9bf2.mp4',
u'info_dict': {
u'title': u'The Government Won\'t Respect My Privacy',
u'description': u'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
},
}]

View File

@@ -6,7 +6,6 @@ from ..utils import (
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_urllib_request, compat_urllib_request,
compat_urllib_parse, compat_urllib_parse,
unescapeHTML,
) )
from ..aes import ( from ..aes import (
aes_decrypt_text aes_decrypt_text
@@ -36,11 +35,12 @@ class SpankwireIE(InfoExtractor):
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)
video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, u'title') video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, u'title')
video_uploader = self._html_search_regex(r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False) video_uploader = self._html_search_regex(
thumbnail = self._html_search_regex(r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False) r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False)
description = self._html_search_regex(r'>\s*Description:</div>\s*<[^>]*>([^<]+)', webpage, u'description', fatal=False) thumbnail = self._html_search_regex(
if len(description) == 0: r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False)
description = None description = self._html_search_regex(
r'<div\s+id="descriptionContent">([^<]+)<', webpage, u'description', fatal=False)
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage))) video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
if webpage.find('flashvars\.encrypted = "true"') != -1: if webpage.find('flashvars\.encrypted = "true"') != -1:

View File

@@ -1,19 +1,26 @@
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
class SpiegelIE(InfoExtractor): class SpiegelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$' _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
_TEST = { _TESTS = [{
u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
u'file': u'1259285.mp4', u'file': u'1259285.mp4',
u'md5': u'2c2754212136f35fb4b19767d242f66e', u'md5': u'2c2754212136f35fb4b19767d242f66e',
u'info_dict': { u'info_dict': {
u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv" u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv"
} }
} },
{
u'url': u'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
u'file': u'1309159.mp4',
u'md5': u'f2cdf638d7aa47654e251e1aee360af1',
u'info_dict': {
u'title': u'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers'
}
}]
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url) m = re.match(self._VALID_URL, url)
@@ -21,25 +28,36 @@ class SpiegelIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>', video_title = self._html_search_regex(
webpage, u'title') r'<div class="module-title">(.*?)</div>', webpage, u'title')
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml' xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
xml_code = self._download_webpage(xml_url, video_id, idoc = self._download_xml(
note=u'Downloading XML', errnote=u'Failed to download XML') xml_url, video_id,
note=u'Downloading XML', errnote=u'Failed to download XML')
idoc = xml.etree.ElementTree.fromstring(xml_code) formats = [
last_type = idoc[-1] {
filename = last_type.findall('./filename')[0].text 'format_id': n.tag.rpartition('type')[2],
duration = float(last_type.findall('./duration')[0].text) 'url': u'http://video2.spiegel.de/flash/' + n.find('./filename').text,
'width': int(n.find('./width').text),
'height': int(n.find('./height').text),
'abr': int(n.find('./audiobitrate').text),
'vbr': int(n.find('./videobitrate').text),
'vcodec': n.find('./codec').text,
'acodec': 'MP4A',
}
for n in list(idoc)
# Blacklist type 6, it's extremely LQ and not available on the same server
if n.tag.startswith('type') and n.tag != 'type6'
]
formats.sort(key=lambda f: f['vbr'])
duration = float(idoc[0].findall('./duration')[0].text)
video_url = 'http://video2.spiegel.de/flash/' + filename
video_ext = filename.rpartition('.')[2]
info = { info = {
'id': video_id, 'id': video_id,
'url': video_url,
'ext': video_ext,
'title': video_title, 'title': video_title,
'duration': duration, 'duration': duration,
'formats': formats,
} }
return [info] return info

View File

@@ -0,0 +1,66 @@
# coding: utf-8
import re
import time
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
compat_urllib_request,
)
class StreamcloudIE(InfoExtractor):
IE_NAME = u'streamcloud.eu'
_VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html'
_TEST = {
u'url': u'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
u'file': u'skp9j99s4bpz.mp4',
u'md5': u'6bea4c7fa5daaacc2a946b7146286686',
u'info_dict': {
u'title': u'youtube-dl test video \'/\\ ä ↭',
u'duration': 9,
},
u'skip': u'Only available from the EU'
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
orig_webpage = self._download_webpage(url, video_id)
fields = re.findall(r'''(?x)<input\s+
type="(?:hidden|submit)"\s+
name="([^"]+)"\s+
(?:id="[^"]+"\s+)?
value="([^"]*)"
''', orig_webpage)
post = compat_urllib_parse.urlencode(fields)
self.to_screen('%s: Waiting for timeout' % video_id)
time.sleep(12)
headers = {
b'Content-Type': b'application/x-www-form-urlencoded',
}
req = compat_urllib_request.Request(url, post, headers)
webpage = self._download_webpage(
req, video_id, note=u'Downloading video page ...')
title = self._html_search_regex(
r'<h1[^>]*>([^<]+)<', webpage, u'title')
video_url = self._search_regex(
r'file:\s*"([^"]+)"', webpage, u'video URL')
duration_str = self._search_regex(
r'duration:\s*"?([0-9]+)"?', webpage, u'duration', fatal=False)
duration = None if duration_str is None else int(duration_str)
thumbnail = self._search_regex(
r'image:\s*"([^"]+)"', webpage, u'thumbnail URL', fatal=False)
return {
'id': video_id,
'title': title,
'url': video_url,
'duration': duration,
'thumbnail': thumbnail,
}

View File

@@ -15,7 +15,8 @@ class SztvHuIE(InfoExtractor):
u'info_dict': { u'info_dict': {
u"title": u"Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren", u"title": u"Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren",
u"description": u'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...', u"description": u'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
} },
u'skip': u'Service temporarily disabled as of 2013-11-20'
} }
def _real_extract(self, url): def _real_extract(self, url):

View File

@@ -1,5 +1,4 @@
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@@ -32,8 +31,7 @@ class TeamcocoIE(InfoExtractor):
self.report_extraction(video_id) self.report_extraction(video_id)
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
data_xml = self._download_webpage(data_url, video_id, 'Downloading data webpage') data = self._download_xml(data_url, video_id, 'Downloading data webpage')
data = xml.etree.ElementTree.fromstring(data_xml.encode('utf-8'))
qualities = ['500k', '480p', '1000k', '720p', '1080p'] qualities = ['500k', '480p', '1000k', '720p', '1080p']
@@ -60,7 +58,7 @@ class TeamcocoIE(InfoExtractor):
return -1 return -1
formats.sort(key=sort_key) formats.sort(key=sort_key)
if not formats: if not formats:
raise RegexNotFoundError(u'Unable to extract video URL') raise ExtractorError(u'Unable to extract video URL')
return { return {
'id': video_id, 'id': video_id,

View File

@@ -4,7 +4,6 @@ import re
from .subtitles import SubtitlesInfoExtractor from .subtitles import SubtitlesInfoExtractor
from ..utils import ( from ..utils import (
compat_str,
RegexNotFoundError, RegexNotFoundError,
) )
@@ -43,26 +42,25 @@ class TEDIE(SubtitlesInfoExtractor):
self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name)) self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name))
return [self._playlist_videos_info(url,name,playlist_id)] return [self._playlist_videos_info(url,name,playlist_id)]
def _playlist_videos_info(self,url,name,playlist_id=0):
def _playlist_videos_info(self, url, name, playlist_id):
'''Returns the videos of the playlist''' '''Returns the videos of the playlist'''
video_RE=r'''
<li\ id="talk_(\d+)"([.\s]*?)data-id="(?P<video_id>\d+)" webpage = self._download_webpage(
([.\s]*?)data-playlist_item_id="(\d+)" url, playlist_id, u'Downloading playlist webpage')
([.\s]*?)data-mediaslug="(?P<mediaSlug>.+?)" matches = re.finditer(
''' r'<p\s+class="talk-title[^"]*"><a\s+href="(?P<talk_url>/talks/[^"]+\.html)">[^<]*</a></p>',
video_name_RE=r'<p\ class="talk-title"><a href="(?P<talk_url>/talks/(.+).html)">(?P<fullname>.+?)</a></p>' webpage)
webpage=self._download_webpage(url, playlist_id, 'Downloading playlist webpage')
m_videos=re.finditer(video_RE,webpage,re.VERBOSE)
m_names=re.finditer(video_name_RE,webpage)
playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>', playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>',
webpage, 'playlist title') webpage, 'playlist title')
playlist_entries = [] playlist_entries = [
for m_video, m_name in zip(m_videos,m_names): self.url_result(u'http://www.ted.com' + m.group('talk_url'), 'TED')
talk_url='http://www.ted.com%s' % m_name.group('talk_url') for m in matches
playlist_entries.append(self.url_result(talk_url, 'TED')) ]
return self.playlist_result(playlist_entries, playlist_id = playlist_id, playlist_title = playlist_title) return self.playlist_result(
playlist_entries, playlist_id=playlist_id, playlist_title=playlist_title)
def _talk_info(self, url, video_id=0): def _talk_info(self, url, video_id=0):
"""Return the video for the talk in the url""" """Return the video for the talk in the url"""
@@ -85,7 +83,7 @@ class TEDIE(SubtitlesInfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'url': stream['file'], 'url': stream['file'],
'format': stream['id'] 'format': stream['id']
} for stream in info['htmlStreams']] } for stream in info['htmlStreams']]
video_id = info['id'] video_id = info['id']
@@ -95,7 +93,7 @@ class TEDIE(SubtitlesInfoExtractor):
self._list_available_subtitles(video_id, webpage) self._list_available_subtitles(video_id, webpage)
return return
info = { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
@@ -104,11 +102,6 @@ class TEDIE(SubtitlesInfoExtractor):
'formats': formats, 'formats': formats,
} }
# TODO: Remove when #980 has been merged
info.update(info['formats'][-1])
return info
def _get_available_subtitles(self, video_id, webpage): def _get_available_subtitles(self, video_id, webpage):
try: try:
options = self._search_regex(r'(?:<select name="subtitles_language_select" id="subtitles_language_select">)(.*?)(?:</select>)', webpage, 'subtitles_language_select', flags=re.DOTALL) options = self._search_regex(r'(?:<select name="subtitles_language_select" id="subtitles_language_select">)(.*?)(?:</select>)', webpage, 'subtitles_language_select', flags=re.DOTALL)
@@ -119,6 +112,6 @@ class TEDIE(SubtitlesInfoExtractor):
url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l) url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
sub_lang_list[l] = url sub_lang_list[l] = url
return sub_lang_list return sub_lang_list
except RegexNotFoundError as err: except RegexNotFoundError:
self._downloader.report_warning(u'video doesn\'t have subtitles') self._downloader.report_warning(u'video doesn\'t have subtitles')
return {} return {}

View File

@@ -0,0 +1,71 @@
# coding: utf-8
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
unified_strdate,
)
class TouTvIE(InfoExtractor):
IE_NAME = u'tou.tv'
_VALID_URL = r'https?://www\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/(?P<episode>S[0-9]+E[0-9]+)))'
_TEST = {
u'url': u'http://www.tou.tv/30-vies/S04E41',
u'file': u'30-vies_S04E41.mp4',
u'info_dict': {
u'title': u'30 vies Saison 4 / Épisode 41',
u'description': u'md5:da363002db82ccbe4dafeb9cab039b09',
u'age_limit': 8,
u'uploader': u'Groupe des Nouveaux Médias',
u'duration': 1296,
u'upload_date': u'20131118',
u'thumbnail': u'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg',
},
u'params': {
u'skip_download': True, # Requires rtmpdump
},
u'skip': 'Only available in Canada'
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
mediaId = self._search_regex(
r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
streams_doc = self._download_xml(
streams_url, video_id, note=u'Downloading stream list')
video_url = next(n.text
for n in streams_doc.findall('.//choice/url')
if u'//ad.doubleclick' not in n.text)
if video_url.endswith('/Unavailable.flv'):
raise ExtractorError(
u'Access to this video is blocked from outside of Canada',
expected=True)
duration_str = self._html_search_meta(
'video:duration', webpage, u'duration')
duration = int(duration_str) if duration_str else None
upload_date_str = self._html_search_meta(
'video:release_date', webpage, u'upload date')
upload_date = unified_strdate(upload_date_str) if upload_date_str else None
return {
'id': video_id,
'title': self._og_search_title(webpage),
'url': video_url,
'description': self._og_search_description(webpage),
'uploader': self._dc_search_uploader(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'age_limit': self._media_rating_search(webpage),
'duration': duration,
'upload_date': upload_date,
'ext': 'mp4',
}

View File

@@ -1,6 +1,5 @@
import json import json
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
@@ -36,12 +35,10 @@ class TriluliluIE(InfoExtractor):
format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/' format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
u'video-formats2' % log) u'video-formats2' % log)
format_str = self._download_webpage( format_doc = self._download_xml(
format_url, video_id, format_url, video_id,
note=u'Downloading formats', note=u'Downloading formats',
errnote=u'Error while downloading formats') errnote=u'Error while downloading formats')
format_doc = xml.etree.ElementTree.fromstring(format_str)
video_url_template = ( video_url_template = (
u'http://fs%(server)s.trilulilu.ro/stream.php?type=video' u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'

View File

@@ -5,8 +5,6 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_urllib_request, compat_urllib_request,
compat_urllib_parse,
unescapeHTML,
) )
from ..aes import ( from ..aes import (
aes_decrypt_text aes_decrypt_text

View File

@@ -13,9 +13,10 @@ class TvpIE(InfoExtractor):
u'md5': u'148408967a6a468953c0a75cbdaf0d7a', u'md5': u'148408967a6a468953c0a75cbdaf0d7a',
u'file': u'12878238.wmv', u'file': u'12878238.wmv',
u'info_dict': { u'info_dict': {
u'title': u'31.10.2013', u'title': u'31.10.2013 - Odcinek 2',
u'description': u'31.10.2013', u'description': u'31.10.2013 - Odcinek 2',
}, },
u'skip': u'Download has to use same server IP as extraction. Therefore, a good (load-balancing) DNS resolver will make the download fail.'
} }
def _real_extract(self, url): def _real_extract(self, url):

View File

@@ -78,12 +78,13 @@ class VevoIE(InfoExtractor):
continue continue
format_url = self._SMIL_BASE_URL + m.group('path') format_url = self._SMIL_BASE_URL + m.group('path')
format_note = ('%(vcodec)s@%(vbr)4sk, %(acodec)s@%(abr)3sk' %
m.groupdict())
formats.append({ formats.append({
'url': format_url, 'url': format_url,
'format_id': u'SMIL_' + m.group('cbr'), 'format_id': u'SMIL_' + m.group('cbr'),
'format_note': format_note, 'vcodec': m.group('vcodec'),
'acodec': m.group('acodec'),
'vbr': int(m.group('vbr')),
'abr': int(m.group('abr')),
'ext': m.group('ext'), 'ext': m.group('ext'),
'width': int(m.group('width')), 'width': int(m.group('width')),
'height': int(m.group('height')), 'height': int(m.group('height')),

View File

@@ -1,5 +1,4 @@
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@@ -27,9 +26,8 @@ class VideofyMeIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
config_xml = self._download_webpage('http://sunshine.videofy.me/?videoId=%s' % video_id, config = self._download_xml('http://sunshine.videofy.me/?videoId=%s' % video_id,
video_id) video_id)
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
video = config.find('video') video = config.find('video')
sources = video.find('sources') sources = video.find('sources')
url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key) url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)

View File

@@ -5,7 +5,7 @@ from .common import InfoExtractor
class VideoPremiumIE(InfoExtractor): class VideoPremiumIE(InfoExtractor):
_VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.tv/(?P<id>\w+)(?:/.*)?' _VALID_URL = r'(?:https?://)?(?:www\.)?videopremium\.(?:tv|me)/(?P<id>\w+)(?:/.*)?'
_TEST = { _TEST = {
u'url': u'http://videopremium.tv/4w7oadjsf156', u'url': u'http://videopremium.tv/4w7oadjsf156',
u'file': u'4w7oadjsf156.f4v', u'file': u'4w7oadjsf156.f4v',
@@ -24,12 +24,16 @@ class VideoPremiumIE(InfoExtractor):
webpage_url = 'http://videopremium.tv/' + video_id webpage_url = 'http://videopremium.tv/' + video_id
webpage = self._download_webpage(webpage_url, video_id) webpage = self._download_webpage(webpage_url, video_id)
self.report_extraction(video_id) if re.match(r"^<html><head><script[^>]*>window.location\s*=", webpage):
# Download again, we need a cookie
webpage = self._download_webpage(
webpage_url, video_id,
note=u'Downloading webpage again (with cookie)')
video_title = self._html_search_regex(r'<h2(?:.*?)>\s*(.+?)\s*<', video_title = self._html_search_regex(
webpage, u'video title') r'<h2(?:.*?)>\s*(.+?)\s*<', webpage, u'video title')
return [{ return {
'id': video_id, 'id': video_id,
'url': "rtmp://e%d.md.iplay.md/play" % random.randint(1, 16), 'url': "rtmp://e%d.md.iplay.md/play" % random.randint(1, 16),
'play_path': "mp4:%s.f4v" % video_id, 'play_path': "mp4:%s.f4v" % video_id,
@@ -37,4 +41,4 @@ class VideoPremiumIE(InfoExtractor):
'player_url': "http://videopremium.tv/uplayer/uppod.swf", 'player_url': "http://videopremium.tv/uplayer/uppod.swf",
'ext': 'f4v', 'ext': 'f4v',
'title': video_title, 'title': video_title,
}] }

View File

@@ -0,0 +1,101 @@
import re
from ..utils import (
ExtractorError,
unescapeHTML,
unified_strdate,
)
from .subtitles import SubtitlesInfoExtractor
class VikiIE(SubtitlesInfoExtractor):
IE_NAME = u'viki'
_VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
_TEST = {
u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14',
u'file': u'1023585v.mp4',
u'md5': u'a21454021c2646f5433514177e2caa5f',
u'info_dict': {
u'title': u'Heirs Episode 14',
u'uploader': u'SBS',
u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
u'upload_date': u'20131121',
u'age_limit': 13,
},
u'skip': u'Blocked in the US',
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
uploader_m = re.search(
r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage)
if uploader_m is None:
uploader = None
else:
uploader = uploader_m.group(1).strip()
rating_str = self._html_search_regex(
r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
u'rating information', default='').strip()
RATINGS = {
'G': 0,
'PG': 10,
'PG-13': 13,
'R': 16,
'NC': 18,
}
age_limit = RATINGS.get(rating_str)
info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
info_webpage = self._download_webpage(
info_url, video_id, note=u'Downloading info page')
if re.match(r'\s*<div\s+class="video-error', info_webpage):
raise ExtractorError(
u'Video %s is blocked from your location.' % video_id,
expected=True)
video_url = self._html_search_regex(
r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
upload_date_str = self._html_search_regex(
r'"created_at":"([^"]+)"', info_webpage, u'upload date')
upload_date = (
unified_strdate(upload_date_str)
if upload_date_str is not None
else None
)
# subtitles
video_subtitles = self.extract_subtitles(video_id, info_webpage)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, info_webpage)
return
return {
'id': video_id,
'title': title,
'url': video_url,
'description': description,
'thumbnail': thumbnail,
'age_limit': age_limit,
'uploader': uploader,
'subtitles': video_subtitles,
'upload_date': upload_date,
}
def _get_available_subtitles(self, video_id, info_webpage):
res = {}
for sturl_html in re.findall(r'<track src="([^"]+)"/>', info_webpage):
sturl = unescapeHTML(sturl_html)
m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
if not m:
continue
res[m.group('lang')] = sturl
return res

View File

@@ -151,7 +151,7 @@ class VimeoIE(InfoExtractor):
config = json.loads(config_json) config = json.loads(config_json)
except RegexNotFoundError: except RegexNotFoundError:
# For pro videos or player.vimeo.com urls # For pro videos or player.vimeo.com urls
config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'], config = self._search_regex([r' = {config:({.+?}),assets:', r'(?:c|b)=({.+?});'],
webpage, u'info section', flags=re.DOTALL) webpage, u'info section', flags=re.DOTALL)
config = json.loads(config) config = json.loads(config)
except Exception as e: except Exception as e:
@@ -249,25 +249,46 @@ class VimeoChannelIE(InfoExtractor):
IE_NAME = u'vimeo:channel' IE_NAME = u'vimeo:channel'
_VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P<id>[^/]+)' _VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P<id>[^/]+)'
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"' _MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
_TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
def _real_extract(self, url): def _extract_videos(self, list_id, base_url):
mobj = re.match(self._VALID_URL, url)
channel_id = mobj.group('id')
video_ids = [] video_ids = []
for pagenum in itertools.count(1): for pagenum in itertools.count(1):
webpage = self._download_webpage('http://vimeo.com/channels/%s/videos/page:%d' % (channel_id, pagenum), webpage = self._download_webpage(
channel_id, u'Downloading page %s' % pagenum) '%s/videos/page:%d/' % (base_url, pagenum),list_id,
u'Downloading page %s' % pagenum)
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage)) video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break break
entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo') entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
for video_id in video_ids] for video_id in video_ids]
channel_title = self._html_search_regex(r'<a href="/channels/%s">(.*?)</a>' % channel_id, list_title = self._html_search_regex(self._TITLE_RE, webpage,
webpage, u'channel title') u'list title')
return {'_type': 'playlist', return {'_type': 'playlist',
'id': channel_id, 'id': list_id,
'title': channel_title, 'title': list_title,
'entries': entries, 'entries': entries,
} }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
channel_id = mobj.group('id')
return self._extract_videos(channel_id, 'http://vimeo.com/channels/%s' % channel_id)
class VimeoUserIE(VimeoChannelIE):
IE_NAME = u'vimeo:user'
_VALID_URL = r'(?:https?://)?vimeo.\com/(?P<name>[^/]+)'
_TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
@classmethod
def suitable(cls, url):
if VimeoChannelIE.suitable(url) or VimeoIE.suitable(url):
return False
return super(VimeoUserIE, cls).suitable(url)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
return self._extract_videos(name, 'http://vimeo.com/%s' % name)

View File

@@ -5,7 +5,6 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_urllib_request, compat_urllib_request,
compat_urllib_parse,
) )
class XTubeIE(InfoExtractor): class XTubeIE(InfoExtractor):

View File

@@ -17,27 +17,21 @@ class YahooIE(InfoExtractor):
_TESTS = [ _TESTS = [
{ {
u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
u'file': u'214727115.flv', u'file': u'214727115.mp4',
u'md5': u'4962b075c08be8690a922ee026d05e69',
u'info_dict': { u'info_dict': {
u'title': u'Julian Smith & Travis Legg Watch Julian Smith', u'title': u'Julian Smith & Travis Legg Watch Julian Smith',
u'description': u'Julian and Travis watch Julian Smith', u'description': u'Julian and Travis watch Julian Smith',
}, },
u'params': {
# Requires rtmpdump
u'skip_download': True,
},
}, },
{ {
u'url': u'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html', u'url': u'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
u'file': u'103000935.flv', u'file': u'103000935.mp4',
u'md5': u'd6e6fc6e1313c608f316ddad7b82b306',
u'info_dict': { u'info_dict': {
u'title': u'Codefellas - The Cougar Lies with Spanish Moss', u'title': u'Codefellas - The Cougar Lies with Spanish Moss',
u'description': u'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?', u'description': u'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
}, },
u'params': {
# Requires rtmpdump
u'skip_download': True,
},
}, },
] ]
@@ -46,15 +40,19 @@ class YahooIE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
items_json = self._search_regex(r'YVIDEO_INIT_ITEMS = ({.*?});$', items_json = self._search_regex(r'mediaItems: ({.*?})$',
webpage, u'items', flags=re.MULTILINE) webpage, u'items', flags=re.MULTILINE)
items = json.loads(items_json) items = json.loads(items_json)
info = items['mediaItems']['query']['results']['mediaObj'][0] info = items['mediaItems']['query']['results']['mediaObj'][0]
# The 'meta' field is not always in the video webpage, we request it # The 'meta' field is not always in the video webpage, we request it
# from another page # from another page
long_id = info['id'] long_id = info['id']
return self._get_info(info['id'], video_id)
def _get_info(self, long_id, video_id):
query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"' query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"'
' AND plrs="86Gj0vCaSzV_Iuf6hNylf2"' % long_id) ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2" AND region="US"'
' AND protocol="http"' % long_id)
data = compat_urllib_parse.urlencode({ data = compat_urllib_parse.urlencode({
'q': query, 'q': query,
'env': 'prod', 'env': 'prod',
@@ -91,17 +89,39 @@ class YahooIE(InfoExtractor):
formats.append(format_info) formats.append(format_info)
formats = sorted(formats, key=lambda f:(f['height'], f['width'])) formats = sorted(formats, key=lambda f:(f['height'], f['width']))
info = { return {
'id': video_id, 'id': video_id,
'title': meta['title'], 'title': meta['title'],
'formats': formats, 'formats': formats,
'description': clean_html(meta['description']), 'description': clean_html(meta['description']),
'thumbnail': meta['thumbnail'], 'thumbnail': meta['thumbnail'],
} }
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info
class YahooNewsIE(YahooIE):
IE_NAME = 'yahoo:news'
_VALID_URL = r'http://news\.yahoo\.com/video/.*?-(?P<id>\d*?)\.html'
_TEST = {
u'url': u'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
u'md5': u'67010fdf3a08d290e060a4dd96baa07b',
u'info_dict': {
u'id': u'104538833',
u'ext': u'mp4',
u'title': u'China Moses Is Crazy About the Blues',
u'description': u'md5:9900ab8cd5808175c7b3fe55b979bed0',
},
}
# Overwrite YahooIE properties we don't want
_TESTS = []
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
long_id = self._search_regex(r'contentId: \'(.+?)\',', webpage, u'long id')
return self._get_info(long_id, video_id)
class YahooSearchIE(SearchInfoExtractor): class YahooSearchIE(SearchInfoExtractor):

View File

@@ -11,7 +11,6 @@ import socket
import string import string
import struct import struct
import traceback import traceback
import xml.etree.ElementTree
import zlib import zlib
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
@@ -29,6 +28,7 @@ from ..utils import (
clean_html, clean_html,
get_cachedir, get_cachedir,
get_element_by_id, get_element_by_id,
get_element_by_attribute,
ExtractorError, ExtractorError,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
@@ -139,10 +139,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
IE_DESC = u'YouTube.com' IE_DESC = u'YouTube.com'
_VALID_URL = r"""^ _VALID_URL = r"""(?x)^
( (
(?:https?://)? # http(s):// (optional) (?:https?://|//)? # http(s):// or protocol-independent URL (optional)
(?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/| (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
tube\.majestyc\.net/| tube\.majestyc\.net/|
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
(?:.*?\#/)? # handle anchor (#/) redirect urls (?:.*?\#/)? # handle anchor (#/) redirect urls
@@ -248,21 +248,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'248': 'webm', '248': 'webm',
} }
_video_dimensions = { _video_dimensions = {
'5': '240x400', '5': '400x240',
'6': '???', '6': '???',
'13': '???', '13': '???',
'17': '144x176', '17': '176x144',
'18': '360x640', '18': '640x360',
'22': '720x1280', '22': '1280x720',
'34': '360x640', '34': '640x360',
'35': '480x854', '35': '854x480',
'36': '240x320', '36': '320x240',
'37': '1080x1920', '37': '1920x1080',
'38': '3072x4096', '38': '4096x3072',
'43': '360x640', '43': '640x360',
'44': '480x854', '44': '854x480',
'45': '720x1280', '45': '1280x720',
'46': '1080x1920', '46': '1920x1080',
'82': '360p', '82': '360p',
'83': '480p', '83': '480p',
'84': '720p', '84': '720p',
@@ -363,6 +363,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
u"uploader_id": u"justintimberlakeVEVO" u"uploader_id": u"justintimberlakeVEVO"
} }
}, },
{
u"url": u"//www.YouTube.com/watch?v=yZIXLfi8CZQ",
u"file": u"yZIXLfi8CZQ.mp4",
u"note": u"Embed-only video (#1746)",
u"info_dict": {
u"upload_date": u"20120608",
u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012",
u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7",
u"uploader": u"SET India",
u"uploader_id": u"setindia"
}
},
] ]
@@ -370,7 +382,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
def suitable(cls, url): def suitable(cls, url):
"""Receives a URL and returns True if suitable for this IE.""" """Receives a URL and returns True if suitable for this IE."""
if YoutubePlaylistIE.suitable(url): return False if YoutubePlaylistIE.suitable(url): return False
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None return re.match(cls._VALID_URL, url) is not None
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(YoutubeIE, self).__init__(*args, **kwargs) super(YoutubeIE, self).__init__(*args, **kwargs)
@@ -1132,8 +1144,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'asrs': 1, 'asrs': 1,
}) })
list_url = caption_url + '&' + list_params list_url = caption_url + '&' + list_params
list_page = self._download_webpage(list_url, video_id) caption_list = self._download_xml(list_url, video_id)
caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
original_lang_node = caption_list.find('track') original_lang_node = caption_list.find('track')
if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' : if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
self._downloader.report_warning(u'Video doesn\'t have automatic captions') self._downloader.report_warning(u'Video doesn\'t have automatic captions')
@@ -1272,7 +1283,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# We simulate the access to the video from www.youtube.com/v/{video_id} # We simulate the access to the video from www.youtube.com/v/{video_id}
# this can be viewed without login into Youtube # this can be viewed without login into Youtube
data = compat_urllib_parse.urlencode({'video_id': video_id, data = compat_urllib_parse.urlencode({'video_id': video_id,
'el': 'embedded', 'el': 'player_embedded',
'gl': 'US', 'gl': 'US',
'hl': 'en', 'hl': 'en',
'eurl': 'https://youtube.googleapis.com/v/' + video_id, 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
@@ -1301,6 +1312,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else: else:
raise ExtractorError(u'"token" parameter not in video info for unknown reason') raise ExtractorError(u'"token" parameter not in video info for unknown reason')
if 'view_count' in video_info:
view_count = int(video_info['view_count'][0])
else:
view_count = None
# Check for "rental" videos # Check for "rental" videos
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
raise ExtractorError(u'"rental" videos not supported') raise ExtractorError(u'"rental" videos not supported')
@@ -1489,10 +1505,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'age_limit': 18 if age_gate else 0, 'age_limit': 18 if age_gate else 0,
'annotations': video_annotations, 'annotations': video_annotations,
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id, 'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
'view_count': view_count,
}) })
return results return results
class YoutubePlaylistIE(InfoExtractor): class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
IE_DESC = u'YouTube.com playlists' IE_DESC = u'YouTube.com playlists'
_VALID_URL = r"""(?: _VALID_URL = r"""(?:
(?:https?://)? (?:https?://)?
@@ -1508,8 +1525,9 @@ class YoutubePlaylistIE(InfoExtractor):
| |
((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,}) ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
)""" )"""
_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none' _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
_MAX_RESULTS = 50 _MORE_PAGES_INDICATOR = r'data-link-type="next"'
_VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
IE_NAME = u'youtube:playlist' IE_NAME = u'youtube:playlist'
@classmethod @classmethod
@@ -1517,6 +1535,27 @@ class YoutubePlaylistIE(InfoExtractor):
"""Receives a URL and returns True if suitable for this IE.""" """Receives a URL and returns True if suitable for this IE."""
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
def _real_initialize(self):
self._login()
def _ids_to_results(self, ids):
return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
for vid_id in ids]
def _extract_mix(self, playlist_id):
# The mixes are generated from a a single video
# the id of the playlist is just 'RD' + video_id
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[2:], playlist_id)
webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
get_element_by_attribute('class', 'title ', webpage))
title = clean_html(title_span)
video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s' % re.escape(playlist_id)
ids = orderedSet(re.findall(video_re, webpage))
url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, title)
def _real_extract(self, url): def _real_extract(self, url):
# Extract playlist id # Extract playlist id
mobj = re.match(self._VALID_URL, url, re.VERBOSE) mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@@ -1530,45 +1569,33 @@ class YoutubePlaylistIE(InfoExtractor):
video_id = query_dict['v'][0] video_id = query_dict['v'][0]
if self._downloader.params.get('noplaylist'): if self._downloader.params.get('noplaylist'):
self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id) self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube') return self.url_result(video_id, 'Youtube', video_id=video_id)
else: else:
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id)) self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
# Download playlist videos from API if len(playlist_id) == 13: # 'RD' + 11 characters for the video id
videos = [] # Mixes require a custom extraction process
return self._extract_mix(playlist_id)
# Extract the video ids from the playlist pages
ids = []
for page_num in itertools.count(1): for page_num in itertools.count(1):
start_index = self._MAX_RESULTS * (page_num - 1) + 1 url = self._TEMPLATE_URL % (playlist_id, page_num)
if start_index >= 1000:
self._downloader.report_warning(u'Max number of results reached')
break
url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num) page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
matches = re.finditer(self._VIDEO_RE, page)
# We remove the duplicates and the link with index 0
# (it's not the first video of the playlist)
new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
ids.extend(new_ids)
try: if re.search(self._MORE_PAGES_INDICATOR, page) is None:
response = json.loads(page)
except ValueError as err:
raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
if 'feed' not in response:
raise ExtractorError(u'Got a malformed response from YouTube API')
playlist_title = response['feed']['title']['$t']
if 'entry' not in response['feed']:
# Number of videos is a multiple of self._MAX_RESULTS
break break
for entry in response['feed']['entry']: playlist_title = self._og_search_title(page)
index = entry['yt$position']['$t']
if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
videos.append((
index,
'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
))
videos = [v[1] for v in sorted(videos)] url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, playlist_title)
url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
return [self.playlist_result(url_results, playlist_id, playlist_title)]
class YoutubeChannelIE(InfoExtractor): class YoutubeChannelIE(InfoExtractor):
@@ -1594,26 +1621,37 @@ class YoutubeChannelIE(InfoExtractor):
# Download channel page # Download channel page
channel_id = mobj.group(1) channel_id = mobj.group(1)
video_ids = [] video_ids = []
url = 'https://www.youtube.com/channel/%s/videos' % channel_id
channel_page = self._download_webpage(url, channel_id)
if re.search(r'channel-header-autogenerated-label', channel_page) is not None:
autogenerated = True
else:
autogenerated = False
# Download all channel pages using the json-based channel_ajax query if autogenerated:
for pagenum in itertools.count(1): # The videos are contained in a single page
url = self._MORE_PAGES_URL % (pagenum, channel_id) # the ajax pages can't be used, they are empty
page = self._download_webpage(url, channel_id, video_ids = self.extract_videos_from_page(channel_page)
u'Downloading page #%s' % pagenum) else:
# Download all channel pages using the json-based channel_ajax query
page = json.loads(page) for pagenum in itertools.count(1):
url = self._MORE_PAGES_URL % (pagenum, channel_id)
ids_in_page = self.extract_videos_from_page(page['content_html']) page = self._download_webpage(url, channel_id,
video_ids.extend(ids_in_page) u'Downloading page #%s' % pagenum)
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']: page = json.loads(page)
break
ids_in_page = self.extract_videos_from_page(page['content_html'])
video_ids.extend(ids_in_page)
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
break
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids] url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls] for video_id in video_ids]
return [self.playlist_result(url_entries, channel_id)] return self.playlist_result(url_entries, channel_id)
class YoutubeUserIE(InfoExtractor): class YoutubeUserIE(InfoExtractor):
@@ -1677,9 +1715,11 @@ class YoutubeUserIE(InfoExtractor):
if len(ids_in_page) < self._GDATA_PAGE_SIZE: if len(ids_in_page) < self._GDATA_PAGE_SIZE:
break break
urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids] url_results = [
url_results = [self.url_result(rurl, 'Youtube') for rurl in urls] self.url_result(video_id, 'Youtube', video_id=video_id)
return [self.playlist_result(url_results, playlist_title = username)] for video_id in video_ids]
return self.playlist_result(url_results, playlist_title=username)
class YoutubeSearchIE(SearchInfoExtractor): class YoutubeSearchIE(SearchInfoExtractor):
IE_DESC = u'YouTube.com searches' IE_DESC = u'YouTube.com searches'
@@ -1720,7 +1760,8 @@ class YoutubeSearchIE(SearchInfoExtractor):
if len(video_ids) > n: if len(video_ids) > n:
video_ids = video_ids[:n] video_ids = video_ids[:n]
videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids] videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in video_ids]
return self.playlist_result(videos, query) return self.playlist_result(videos, query)
class YoutubeSearchDateIE(YoutubeSearchIE): class YoutubeSearchDateIE(YoutubeSearchIE):
@@ -1750,7 +1791,6 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties. Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
""" """
_LOGIN_REQUIRED = True _LOGIN_REQUIRED = True
_PAGING_STEP = 30
# use action_load_personal_feed instead of action_load_system_feed # use action_load_personal_feed instead of action_load_system_feed
_PERSONAL_FEED = False _PERSONAL_FEED = False
@@ -1770,9 +1810,8 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
feed_entries = [] feed_entries = []
# The step argument is available only in 2.7 or higher paging = 0
for i in itertools.count(0): for i in itertools.count(1):
paging = i*self._PAGING_STEP
info = self._download_webpage(self._FEED_TEMPLATE % paging, info = self._download_webpage(self._FEED_TEMPLATE % paging,
u'%s feed' % self._FEED_NAME, u'%s feed' % self._FEED_NAME,
u'Downloading page %s' % i) u'Downloading page %s' % i)
@@ -1780,9 +1819,12 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
feed_html = info['feed_html'] feed_html = info['feed_html']
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html) m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
ids = orderedSet(m.group(1) for m in m_ids) ids = orderedSet(m.group(1) for m in m_ids)
feed_entries.extend(self.url_result(id, 'Youtube') for id in ids) feed_entries.extend(
self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in ids)
if info['paging'] is None: if info['paging'] is None:
break break
paging = info['paging']
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE) return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor): class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
@@ -1802,9 +1844,15 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
_VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater' _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
_FEED_NAME = 'watch_later' _FEED_NAME = 'watch_later'
_PLAYLIST_TITLE = u'Youtube Watch Later' _PLAYLIST_TITLE = u'Youtube Watch Later'
_PAGING_STEP = 100
_PERSONAL_FEED = True _PERSONAL_FEED = True
class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)'
_VALID_URL = u'https?://www\.youtube\.com/feed/history|:ythistory'
_FEED_NAME = 'history'
_PERSONAL_FEED = True
_PLAYLIST_TITLE = u'Youtube Watch History'
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
IE_NAME = u'youtube:favorites' IE_NAME = u'youtube:favorites'
IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)' IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'

View File

@@ -1,75 +1,125 @@
# coding: utf-8
import operator
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext, unified_strdate,
ExtractorError,
) )
class ZDFIE(InfoExtractor): class ZDFIE(InfoExtractor):
_VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek(?P<hash>#)?\/(.*beitrag\/video\/)(?P<video_id>[^/\?]+)(?:\?.*)?' _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<video_id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
_MEDIA_STREAM = r'<a href="(?P<video_url>.+(?P<media_type>.streaming).+/zdf/(?P<quality>[^\/]+)/[^"]*)".+class="play".+>'
_TEST = {
u"url": u"http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt",
u"file": u"2037704.webm",
u"info_dict": {
u"upload_date": u"20131127",
u"description": u"Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial \"Ende des Machtpokers - Große Koalition für Deutschland\".",
u"uploader": u"spezial",
u"title": u"ZDFspezial - Ende des Machtpokers"
},
u"skip": u"Videos on ZDF.de are depublicised in short order",
}
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group('video_id') video_id = mobj.group('video_id')
if mobj.group('hash'): xml_url = u'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
url = url.replace(u'#', u'', 1) doc = self._download_xml(
xml_url, video_id,
note=u'Downloading video info',
errnote=u'Failed to download video info')
html = self._download_webpage(url, video_id) title = doc.find('.//information/title').text
streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)] description = doc.find('.//information/detail').text
if streams is None: uploader_node = doc.find('.//details/originChannelTitle')
raise ExtractorError(u'No media url found.') uploader = None if uploader_node is None else uploader_node.text
duration_str = doc.find('.//details/length').text
duration_m = re.match(r'''(?x)^
(?P<hours>[0-9]{2})
:(?P<minutes>[0-9]{2})
:(?P<seconds>[0-9]{2})
(?:\.(?P<ms>[0-9]+)?)
''', duration_str)
duration = (
(
(int(duration_m.group('hours')) * 60 * 60) +
(int(duration_m.group('minutes')) * 60) +
int(duration_m.group('seconds'))
)
if duration_m
else None
)
upload_date = unified_strdate(doc.find('.//details/airtime').text)
# s['media_type'] == 'wstreaming' -> use 'Windows Media Player' and mms url def xml_to_format(fnode):
# s['media_type'] == 'hstreaming' -> use 'Quicktime' and rtsp url video_url = fnode.find('url').text
# choose first/default media type and highest quality for now is_available = u'http://www.metafilegenerator' not in video_url
def stream_pref(s):
TYPE_ORDER = ['ostreaming', 'hstreaming', 'wstreaming'] format_id = fnode.attrib['basetype']
format_m = re.match(r'''(?x)
(?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
(?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
''', format_id)
ext = format_m.group('container')
is_supported = ext != 'f4f'
PROTO_ORDER = ['http', 'rtmp', 'rtsp']
try: try:
type_pref = TYPE_ORDER.index(s['media_type']) proto_pref = -PROTO_ORDER.index(format_m.group('proto'))
except ValueError: except ValueError:
type_pref = 999 proto_pref = 999
QUALITY_ORDER = ['veryhigh', '300'] quality = fnode.find('./quality').text
QUALITY_ORDER = ['veryhigh', '300', 'high', 'med', 'low']
try: try:
quality_pref = QUALITY_ORDER.index(s['quality']) quality_pref = -QUALITY_ORDER.index(quality)
except ValueError: except ValueError:
quality_pref = 999 quality_pref = 999
return (type_pref, quality_pref) abr = int(fnode.find('./audioBitrate').text) // 1000
vbr = int(fnode.find('./videoBitrate').text) // 1000
pref = (is_available, is_supported,
proto_pref, quality_pref, vbr, abr)
sorted_streams = sorted(streams, key=stream_pref) format_note = u''
if not sorted_streams: if not is_supported:
raise ExtractorError(u'No stream found.') format_note += u'(unsupported)'
stream = sorted_streams[0] if not format_note:
format_note = None
media_link = self._download_webpage( return {
stream['video_url'], 'format_id': format_id + u'-' + quality,
video_id, 'url': video_url,
u'Get stream URL') 'ext': ext,
'acodec': format_m.group('acodec'),
'vcodec': format_m.group('vcodec'),
'abr': abr,
'vbr': vbr,
'width': int(fnode.find('./width').text),
'height': int(fnode.find('./height').text),
'filesize': int(fnode.find('./filesize').text),
'format_note': format_note,
'_pref': pref,
'_available': is_available,
}
MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"' format_nodes = doc.findall('.//formitaeten/formitaet')
RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)' formats = sorted(filter(lambda f: f['_available'],
map(xml_to_format, format_nodes)),
mobj = re.search(self._MEDIA_STREAM, media_link) key=operator.itemgetter('_pref'))
if mobj is None:
mobj = re.search(RTSP_STREAM, media_link)
if mobj is None:
raise ExtractorError(u'Cannot extract mms:// or rtsp:// URL')
video_url = mobj.group('video_url')
title = self._html_search_regex(
r'<h1(?: class="beitragHeadline")?>(.*?)</h1>',
html, u'title')
return { return {
'id': video_id, 'id': video_id,
'url': video_url,
'title': title, 'title': title,
'ext': determine_ext(video_url) 'formats': formats,
'description': description,
'uploader': uploader,
'duration': duration,
'upload_date': upload_date,
} }

View File

@@ -2,11 +2,15 @@ import io
import json import json
import traceback import traceback
import hashlib import hashlib
import os
import subprocess import subprocess
import sys import sys
from zipimport import zipimporter from zipimport import zipimporter
from .utils import * from .utils import (
compat_str,
compat_urllib_request,
)
from .version import __version__ from .version import __version__
def rsa_verify(message, signature, key): def rsa_verify(message, signature, key):
@@ -37,6 +41,7 @@ def rsa_verify(message, signature, key):
if signature != sha256(message).digest(): return False if signature != sha256(message).digest(): return False
return True return True
def update_self(to_screen, verbose): def update_self(to_screen, verbose):
"""Update the program file with the latest version from the repository""" """Update the program file with the latest version from the repository"""
@@ -78,6 +83,13 @@ def update_self(to_screen, verbose):
return return
version_id = versions_info['latest'] version_id = versions_info['latest']
def version_tuple(version_str):
return tuple(map(int, version_str.split('.')))
if version_tuple(__version__) >= version_tuple(version_id):
to_screen(u'youtube-dl is up to date (%s)' % __version__)
return
to_screen(u'Updating to version ' + version_id + '...') to_screen(u'Updating to version ' + version_id + '...')
version = versions_info['versions'][version_id] version = versions_info['versions'][version_id]
@@ -105,7 +117,7 @@ def update_self(to_screen, verbose):
urlh = compat_urllib_request.urlopen(version['exe'][0]) urlh = compat_urllib_request.urlopen(version['exe'][0])
newcontent = urlh.read() newcontent = urlh.read()
urlh.close() urlh.close()
except (IOError, OSError) as err: except (IOError, OSError):
if verbose: to_screen(compat_str(traceback.format_exc())) if verbose: to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: unable to download latest version') to_screen(u'ERROR: unable to download latest version')
return return
@@ -118,7 +130,7 @@ def update_self(to_screen, verbose):
try: try:
with open(exe + '.new', 'wb') as outf: with open(exe + '.new', 'wb') as outf:
outf.write(newcontent) outf.write(newcontent)
except (IOError, OSError) as err: except (IOError, OSError):
if verbose: to_screen(compat_str(traceback.format_exc())) if verbose: to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: unable to write the new version') to_screen(u'ERROR: unable to write the new version')
return return
@@ -137,7 +149,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
subprocess.Popen([bat]) # Continues to run in the background subprocess.Popen([bat]) # Continues to run in the background
return # Do not show premature success messages return # Do not show premature success messages
except (IOError, OSError) as err: except (IOError, OSError):
if verbose: to_screen(compat_str(traceback.format_exc())) if verbose: to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: unable to overwrite current version') to_screen(u'ERROR: unable to overwrite current version')
return return
@@ -148,7 +160,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
urlh = compat_urllib_request.urlopen(version['bin'][0]) urlh = compat_urllib_request.urlopen(version['bin'][0])
newcontent = urlh.read() newcontent = urlh.read()
urlh.close() urlh.close()
except (IOError, OSError) as err: except (IOError, OSError):
if verbose: to_screen(compat_str(traceback.format_exc())) if verbose: to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: unable to download latest version') to_screen(u'ERROR: unable to download latest version')
return return
@@ -161,7 +173,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
try: try:
with open(filename, 'wb') as outf: with open(filename, 'wb') as outf:
outf.write(newcontent) outf.write(newcontent)
except (IOError, OSError) as err: except (IOError, OSError):
if verbose: to_screen(compat_str(traceback.format_exc())) if verbose: to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: unable to overwrite current version') to_screen(u'ERROR: unable to overwrite current version')
return return

View File

@@ -8,13 +8,16 @@ import gzip
import io import io
import json import json
import locale import locale
import math
import os import os
import pipes import pipes
import platform import platform
import re import re
import ssl
import socket import socket
import sys import sys
import traceback import traceback
import xml.etree.ElementTree
import zlib import zlib
try: try:
@@ -535,17 +538,34 @@ def formatSeconds(secs):
else: else:
return '%d' % secs return '%d' % secs
def make_HTTPS_handler(opts): def make_HTTPS_handler(opts_no_check_certificate):
if sys.version_info < (3,2): if sys.version_info < (3, 2):
# Python's 2.x handler is very simplistic import httplib
return compat_urllib_request.HTTPSHandler()
class HTTPSConnectionV3(httplib.HTTPSConnection):
def __init__(self, *args, **kwargs):
httplib.HTTPSConnection.__init__(self, *args, **kwargs)
def connect(self):
sock = socket.create_connection((self.host, self.port), self.timeout)
if self._tunnel_host:
self.sock = sock
self._tunnel()
try:
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
except ssl.SSLError:
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
def https_open(self, req):
return self.do_open(HTTPSConnectionV3, req)
return HTTPSHandlerV3()
else: else:
import ssl context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
context.set_default_verify_paths() context.set_default_verify_paths()
context.verify_mode = (ssl.CERT_NONE context.verify_mode = (ssl.CERT_NONE
if opts.no_check_certificate if opts_no_check_certificate
else ssl.CERT_REQUIRED) else ssl.CERT_REQUIRED)
return compat_urllib_request.HTTPSHandler(context=context) return compat_urllib_request.HTTPSHandler(context=context)
@@ -734,6 +754,8 @@ def unified_strdate(date_str):
'%Y/%m/%d %H:%M:%S', '%Y/%m/%d %H:%M:%S',
'%d.%m.%Y %H:%M', '%d.%m.%Y %H:%M',
'%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%SZ',
'%Y-%m-%dT%H:%M:%S.%fZ',
'%Y-%m-%dT%H:%M:%S.%f0Z',
'%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%S',
] ]
for expression in format_expressions: for expression in format_expressions:
@@ -949,7 +971,16 @@ class locked_file(object):
def shell_quote(args): def shell_quote(args):
return ' '.join(map(pipes.quote, args)) quoted_args = []
encoding = sys.getfilesystemencoding()
if encoding is None:
encoding = 'utf-8'
for a in args:
if isinstance(a, bytes):
# We may get a filename encoded with 'encodeFilename'
a = a.decode(encoding)
quoted_args.append(pipes.quote(a))
return u' '.join(quoted_args)
def takewhile_inclusive(pred, seq): def takewhile_inclusive(pred, seq):
@@ -976,3 +1007,17 @@ def unsmuggle_url(smug_url):
jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0] jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
data = json.loads(jsond) data = json.loads(jsond)
return url, data return url, data
def format_bytes(bytes):
if bytes is None:
return u'N/A'
if type(bytes) is str:
bytes = float(bytes)
if bytes == 0.0:
exponent = 0
else:
exponent = int(math.log(bytes, 1024.0))
suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
converted = float(bytes) / float(1024 ** exponent)
return u'%.2f%s' % (converted, suffix)

View File

@@ -1,2 +1,2 @@
__version__ = '2013.11.15' __version__ = '2013.12.02'