Compare commits

..

191 Commits

Author SHA1 Message Date
Philipp Hagemeister
19b05d886e release 2015.01.03 2015-01-03 18:35:30 +01:00
Philipp Hagemeister
e65566a9cc [youtube] Correct handling when DASH manifest is not necessary to find all formats 2015-01-03 18:33:38 +01:00
Sergey M․
baa3c3f0f6 [ellentv] Improve extraction 2015-01-03 21:54:18 +06:00
Sergey M․
f4f339529c [ellentv] Clean up and simplify 2015-01-03 21:44:47 +06:00
Sergey M.
7d02fae85b Merge pull request #4626 from gauravb7090/ellentube
Added support for EllenTube along with EllenTV
2015-01-03 21:40:39 +06:00
Gaurav
6e46c3f1fd Added support for EllenTube along with EllenTV 2015-01-03 20:30:28 +05:30
Sergey M․
c7e675940c [bbccouk] Add support for music clips (Closes #4143) 2015-01-03 20:43:40 +06:00
Jaime Marquínez Ferrándiz
d26b1317ed [downloader/mplayer] Use check_executable 2015-01-03 00:33:36 +01:00
Jaime Marquínez Ferrándiz
a221f22969 [crunchyroll] Fix format extraction
Reported in https://github.com/rg3/youtube-dl/issues/2782#issuecomment-68556780
2015-01-02 21:17:10 +01:00
Jaime Marquínez Ferrándiz
817f786fbb [canalplus] Raise an error if the video is georestricted (closes #4472) 2015-01-02 21:02:34 +01:00
Sergey M․
62420c73cb [played] Skip test 2015-01-02 22:31:55 +06:00
Sergey M․
2522a0b7da [kontrtube] Extract display_id
Trailing slash in URL is mandatory now
2015-01-02 22:28:48 +06:00
Sergey M․
46d32a12c9 [bet] Update test 2015-01-02 22:23:00 +06:00
Sergey M․
c491418526 [bbccouk] Update test 2015-01-02 22:13:26 +06:00
Sergey M․
823a155293 [vier:videos] Tune _VALID_URL not to match single videos 2015-01-02 22:09:00 +06:00
Sergey M․
324b2c78fa [xtube] Fix uploader regex 2015-01-02 21:46:57 +06:00
Sergey M․
d34f98289b [xhamster] Remove identical tests 2015-01-02 21:12:25 +06:00
Sergey M.
644096b15c Merge pull request #4615 from dwemthy/https_xhamster
[xhamster] Add HTTPS support
2015-01-02 21:09:28 +06:00
Sergey M․
15cebcc363 Merge branch 'master' of github.com:rg3/youtube-dl 2015-01-02 20:57:12 +06:00
Sergey M․
faa4ea68c0 [generic] Add BBC iPlayer playlist test 2015-01-02 20:56:42 +06:00
Philipp Hagemeister
29a9385ff0 release 2015.01.02 2015-01-02 15:56:26 +01:00
Sergey M․
476eae0c2a [generic] Generalize BBC iPlayer playlist extraction 2015-01-02 20:55:09 +06:00
Sergey M․
8399267671 [generic] Make getter None by default 2015-01-02 20:54:30 +06:00
Sergey M․
db546cf87f [generic] Add support for BBC iPlayer embeds (Closes #4619) 2015-01-02 20:46:17 +06:00
Sergey M․
317639758a [bbccouk] Improve _VALID_URL 2015-01-02 20:37:54 +06:00
Sergey M․
fdbabca85f [vier:videos] Tune _VALID_URL 2015-01-02 20:21:41 +06:00
Sergey M․
6f790e5821 Credit @lovebug356 for vier (#4617) 2015-01-02 20:16:43 +06:00
Sergey M․
6f5cdeb611 Merge branch 'lovebug356-vier' 2015-01-02 20:15:59 +06:00
Sergey M․
9eb4f404cb [vier] Simplify, add support for more URL formats, extract all playlist pages when page is not specified 2015-01-02 20:15:40 +06:00
Thijs Vermeir
f58487b392 [vier] Add new extractor 2015-01-02 13:35:47 +01:00
dwemthy
5b9aefef77 [xhamster] Add HTTPS support 2015-01-02 11:54:38 +00:00
Philipp Hagemeister
772fd5cc44 [youtube] Add a pseudo-extractor for truncated YouTube video IDs (#4610) 2015-01-01 23:44:39 +01:00
Philipp Hagemeister
50a0f6df7e [/__init__] Add another cute search example 2015-01-01 22:47:21 +01:00
Philipp Hagemeister
9f435c5f1c Add an extractor for common mistakes (#4610) 2015-01-01 22:34:58 +01:00
Philipp Hagemeister
931e2d1d26 [bbccouk] PEP8 2015-01-01 22:15:46 +01:00
Philipp Hagemeister
a42419da42 [options] Upper-case options and URL in --help output
Hopefully, this reduces confusion as in #4610.
2015-01-01 22:01:47 +01:00
Philipp Hagemeister
9a237b776c release 2015.01.01 2015-01-01 21:41:42 +01:00
Sergey M․
02ec32a1ef [ceskatelevize] Adapt to new API (Closes #4531) 2015-01-01 20:01:55 +06:00
Sergey M․
a1e9e6440f [moevideo] Skip removed video test 2015-01-01 00:46:03 +06:00
Sergey M․
5878e6398c [nrktv] Update tests' checksums 2015-01-01 00:37:57 +06:00
Sergey M․
6c6f1408f2 [extractor/common] Allow multiline content tags 2015-01-01 00:37:14 +06:00
Sergey M․
b7a7319c38 [slideshare] Fix extraction 2015-01-01 00:26:19 +06:00
Sergey M․
68f705cac5 [tnaflix] Make sure config URL has correct scheme 2015-01-01 00:12:41 +06:00
Sergey M․
079d1dcd80 [tnaflix] Fix title extraction 2015-01-01 00:11:56 +06:00
Sergey M․
7b24bbdf49 [xboxclips] Fix extraction 2014-12-31 23:59:16 +06:00
Jaime Marquínez Ferrándiz
f86d543ebb [pbs] Catch geoblocking errors (closes #4516) 2014-12-31 17:43:49 +01:00
Jaime Marquínez Ferrándiz
60e47a2699 [youtube] Use '_download_xml' for getting the available subtitles 2014-12-31 15:44:15 +01:00
Sergey M․
b8bc7a696b [openfilm] Add extractor (Closes #4538) 2014-12-31 19:40:35 +06:00
Jaime Marquínez Ferrándiz
7d900ef1bf [youtube] Add support for automatically translated subtitles (fixes #4555)
They have a manually uploaded subtitles track and YouTube can transtale it.
2014-12-31 14:15:16 +01:00
Sergey M․
1931a73f39 [echomsk] Add extractor (Closes #4600) 2014-12-31 18:03:51 +06:00
Philipp Hagemeister
966ea3aebd [README] Typo / clarify FAQ 2014-12-30 23:41:29 +01:00
Philipp Hagemeister
b3013681ff Merge remote-tracking branch 'origin/master' 2014-12-30 19:41:04 +01:00
Philipp Hagemeister
416c7fcbce Add documentation about supported sites (Fixes #4503) 2014-12-30 19:35:35 +01:00
Sergey M․
e83eebb12f [atresplayer] Fix python3 bug 2014-12-30 22:46:23 +06:00
Sergey M․
a349873226 [atresplayer] Add extractor (Closes #2341) 2014-12-30 22:28:07 +06:00
Sergey M․
fccae2b911 [youtube] Add test for age-gate video with encrypted signature 2014-12-30 17:26:21 +06:00
Sergey M․
3ee08848db Credit @0xced for #4598 2014-12-30 17:12:12 +06:00
Sergey M.
0129b4dd45 Merge pull request #4598 from 0xced/encrypted-age-gate
[youtube] Fix videos with age gate and encrypted signatures
2014-12-30 17:09:02 +06:00
Sergey M․
1c57e7f1f4 [daum] Improve full_id regex 2014-12-30 16:55:53 +06:00
Sergey M.
d0caf3a11e Merge pull request #4599 from t0mm0/daum_fix
[daum] update 'full id' regex
2014-12-30 16:52:02 +06:00
t0mm0
a87bb090d9 [daum] update 'full id' regex
fixes #4566
2014-12-29 23:06:56 +00:00
Cédric Luthi
beb95e7781 [youtube] Fix videos with age gate and encrypted signatures
The `sts` value is available on the embed webpage, get it from there.

Fixes #4108.
2014-12-29 22:58:14 +01:00
Sergey M․
5435d7af91 Merge branch 't0mm0-hitbox' 2014-12-30 03:22:25 +06:00
Sergey M․
0c0a70f4c6 [hitbox] Minor changes 2014-12-30 03:22:07 +06:00
t0mm0
e3947e2b7f [hitbox] add support for live streams 2014-12-29 20:12:23 +00:00
t0mm0
da3f7fb7f8 [hitbox] add extractor for hitbox vods 2014-12-29 20:12:23 +00:00
Sergey M․
429ddfd38d [cnn] Add support for hln URL format (Closes #4595) 2014-12-30 01:50:28 +06:00
Sergey M․
479514d015 Merge branch 'peugeot-hellporno' 2014-12-29 21:33:57 +06:00
Sergey M․
355e41466d [hellporno] Extract all formats and improve 2014-12-29 21:33:41 +06:00
Sergey M․
03d9aad87c Merge branch 'hellporno' of https://github.com/peugeot/youtube-dl into peugeot-hellporno 2014-12-29 21:13:09 +06:00
Sergey M․
3e2bcf530b Merge branch 'peugeot-xxxymovies' 2014-12-29 21:05:41 +06:00
Sergey M․
6343a5f68e [xxxymovies] Improve 2014-12-29 21:05:21 +06:00
Sergey M․
00de9a9828 Merge branch 'xxxymovies' of https://github.com/peugeot/youtube-dl into peugeot-xxxymovies 2014-12-29 20:38:28 +06:00
Sergey M․
7fc2cd819e [cnn] Improve regexes and fix test 2014-12-29 20:27:09 +06:00
Sergey M.
974739aab5 Merge pull request #4543 from akretz/cnn_fix
[cnn] Add support for articles with videos (fixes #4541)
2014-12-29 20:21:39 +06:00
peugeot
0cc4f8e385 [xxxymovies] new ectractor 2014-12-29 11:31:22 +01:00
peugeot
513fd2a872 [hellporno] simplify 2014-12-29 10:38:07 +01:00
Sergey M․
ae6986fb89 [bbccouk] Switch to new JSON playlist format (Closes #4588) 2014-12-29 03:00:24 +06:00
Sergey M․
e8e28989eb [archiveorg] Add test, simplify and modernize 2014-12-29 02:08:46 +06:00
Sergey M.
0fa629d05b Merge pull request #4590 from derrotebaron/master
[archiveorg] most metadata fields are optional
2014-12-29 01:53:59 +06:00
Johannes Knoedtel
ff7a07d5c4 [archiveorg] most metadata fields are optional
Example: https://archive.org/details/Cops1922
2014-12-28 20:31:25 +01:00
Sergey M․
5a18403057 [arte.tv] Fix typo 2014-12-28 15:42:29 +06:00
Sergey M․
1b7b1d6eac [arte.tv:+7] Make quality optional (Closes #4583) 2014-12-28 15:41:52 +06:00
Sergey M․
23cfa4ae45 Merge branch 'peugeot-alphaporno' 2014-12-27 00:08:25 +06:00
Sergey M․
e82def52a9 [alphaporno] Improve 2014-12-27 00:08:04 +06:00
Sergey M․
bcfe9db299 Merge branch 'alphaporno' of https://github.com/peugeot/youtube-dl into peugeot-alphaporno 2014-12-26 23:34:12 +06:00
Sergey M․
cf00ae7640 Merge branch 'peugeot-eroprofile' 2014-12-26 23:33:01 +06:00
Sergey M․
f9b9e88646 [eroprofile] Simplify 2014-12-26 23:32:41 +06:00
Sergey M․
c2500434c3 Merge branch 'eroprofile' of https://github.com/peugeot/youtube-dl into peugeot-eroprofile 2014-12-26 23:16:25 +06:00
Sergey M․
f74b341dde expect_info_dict actual-expected argument consistency 2014-12-26 23:07:24 +06:00
peugeot
461b00f34a [eroprofile] new extractor 2014-12-26 17:15:34 +01:00
peugeot
4cda41ac7b [alphaporno] new extractor 2014-12-26 16:17:35 +01:00
peugeot
6a1c4fbfcb [hellporno] new extractor 2014-12-26 15:49:12 +01:00
Sergey M․
31424c126f [sunporno] Modernize 2014-12-26 19:28:51 +06:00
Sergey M.
53096539dc Merge pull request #4568 from peugeot/sunporno
[sunporno] fix duration
2014-12-26 19:25:05 +06:00
peugeot
2c0b475235 [sunporno] fix duration 2014-12-26 12:49:13 +01:00
Sergey M․
a542405200 Credit @MaxReimann for teletask (#4533) 2014-12-25 23:29:10 +06:00
Sergey M․
3e2b085ef9 Merge branch 'MaxReimann-teletask' 2014-12-25 23:27:23 +06:00
Sergey M․
885e4384a1 [teletask] Simplify 2014-12-25 23:26:57 +06:00
Sergey M․
2b8f151094 Merge branch 'teletask' of https://github.com/MaxReimann/youtube-dl into MaxReimann-teletask 2014-12-25 23:06:26 +06:00
Sergey M․
5ac71f0b27 [sohu] Modernize and extract all formats and more metadata (Closes #4409, closes #2056, closes #3009) 2014-12-25 22:25:05 +06:00
Sergey M․
39ac7c9435 [gameone] Extract duration as float 2014-12-24 19:18:59 +06:00
Sergey M.
ed7bdc8a90 Merge pull request #4553 from tobidope/gameone
[gameone] This fix resolves issue #4552
2014-12-24 19:05:05 +06:00
Tobias Bell
55f0cab3a3 [gameone] This fix resolves issue #4552
The duration metadata for certain episodes contained floating point
numbers instead of integers. Now only the integer part will be
interpreted. Also added a test for this
2014-12-23 22:09:21 +01:00
Sergey M․
544dec6298 [smotri] Skip broken tests 2014-12-23 20:33:56 +06:00
Jaime Marquínez Ferrándiz
e0ae1814b1 [sportdeutschland] Fix extraction (fixes #4544) 2014-12-22 22:24:19 +01:00
Adrian Kretz
9532d72371 [cnn] Add support for articles with videos (fixes #4541) 2014-12-22 18:40:36 +01:00
Sergey M․
1362bbbb4b [adobetv] Add extractor (Closes #4536) 2014-12-22 22:05:47 +06:00
Jaime Marquínez Ferrándiz
f00fd51dae Don't write the description file if info_dict['description'] is None (#3166) 2014-12-21 20:49:14 +01:00
Sergey M․
a8896c5ac2 [crunchyroll] Add .fr domain (#4537) 2014-12-22 00:58:15 +06:00
Jaime Marquínez Ferrándiz
5d3808524d [extractor/common] Update docstring: replace FileDownloader with YoutubeDL 2014-12-21 16:58:29 +01:00
Jaime Marquínez Ferrándiz
c8f167823f [dbtv] Make sure the 'id' field is a string 2014-12-21 16:57:07 +01:00
Jaime Marquínez Ferrándiz
70f6796e7d [telecinco] Rename 'episode' group to 'id' in the _VALID_URL regex
MiTeleIE now uses '_match_id'
2014-12-21 16:54:53 +01:00
Jaime Marquínez Ferrándiz
85d253af6b [internetvideoarchive] Update test's duration field 2014-12-21 15:37:21 +01:00
Jaime Marquínez Ferrándiz
a86cbf5876 [rtp] Fix test's id field 2014-12-21 15:28:40 +01:00
Jaime Marquínez Ferrándiz
3f1399de8a [tmz] Fix test's thumbnail field 2014-12-21 15:26:00 +01:00
Jaime Marquínez Ferrándiz
1f809a8560 [nerdcubed] Style fixes 2014-12-21 15:22:30 +01:00
Jaime Marquínez Ferrándiz
653d14e2f9 [yahoo] Update extraction process
Their webpage uses now https://video.media.yql.yahoo.com/v1/video/sapi/streams/ for getting the video info.
2014-12-21 14:47:44 +01:00
Jaime Marquínez Ferrándiz
85fab7e47b [yahoo] Replace two tests
The first one returned an internal server error.
The other doesn't seem to contain a video anymore.
2014-12-21 14:47:12 +01:00
Jaime Marquínez Ferrándiz
3aa9176f08 [yahoo] Improve video id detection (fixes #4521) 2014-12-21 14:09:00 +01:00
MaxReimann
33b53b6021 [teletask] Add new extractor 2014-12-21 12:26:47 +01:00
MaxReimann
3f7421b71b fix test and remove lengthy description 2014-12-21 11:13:59 +01:00
MaxReimann
ee45625290 Add extractor for teletask 2014-12-21 11:01:28 +01:00
Sergey M․
2c2a42587b [dvtv] Fix thumbnail scheme 2014-12-21 07:38:55 +06:00
Sergey M․
e2f65efcf9 Merge branch 'petrkutalek-dvtv' 2014-12-21 07:34:27 +06:00
Sergey M․
081d6e4784 [dvtv] Simplify 2014-12-21 07:33:58 +06:00
Petr Kutalek
1d4247f64e [dvtv] Add support for playlists 2014-12-21 01:24:05 +01:00
Sergey M․
1ff30d7b79 [npo] Add support for streams (Closes #4276) 2014-12-20 18:30:56 +06:00
Sergey M․
16ea817968 [xtube] Fix and modernize (Closes #4489) 2014-12-19 21:56:44 +06:00
Philipp Hagemeister
a2a4bae929 Credit @willglynn for nerdcubed (#4515) 2014-12-19 10:32:20 +01:00
Will Glynn
c58843b3a1 [nerdcubed] Add new extractor
nerdcubed.co.uk describes videos in a single a feed.json file, providing
references to and metadata on >1300 YouTube videos spread across 3 main
channels as well as guest appareances on other channels via a single HTTP
request.

NerdCubedFeedIE transforms this feed into a youtube-dl playlist, preserving
information present in the upstream JSON (allowing zero-cost title/date
matches) and ultimately referencing the embedded YouTube videos.
2014-12-18 22:32:24 -06:00
Sergey M․
a22524b004 [twitch] Add support for vods (Closes #4512) 2014-12-18 21:25:42 +06:00
Philipp Hagemeister
87c4c21e75 Credit @petrkutalek for dvtv (#4502) 2014-12-17 23:38:11 +01:00
Philipp Hagemeister
b9465395cb [dvtv] PEP8 and correct format sorting (#4502) 2014-12-17 23:18:06 +01:00
Philipp Hagemeister
edf41477f0 Merge remote-tracking branch 'petrkutalek/dvtv' 2014-12-17 23:12:38 +01:00
Petr Kutalek
5f627b4448 [dvtv] Add new extractor 2014-12-17 15:52:54 +01:00
Philipp Hagemeister
60e5428925 [flake8] Ignore build/ directory
That directory is temporarily generated when building for PyPi and may be present if something goes wrong with the upload.
2014-12-17 15:36:18 +01:00
Sergey M․
748ec66725 [theplatform] Extract captions (Closes #4495) 2014-12-17 20:20:40 +06:00
Jaime Marquínez Ferrándiz
e54a3a2f01 [screencastomatic] Remove unused variable 2014-12-17 14:56:30 +01:00
Jaime Marquínez Ferrándiz
0e4cb4f406 YoutubeDL: style fix 2014-12-17 14:55:27 +01:00
Philipp Hagemeister
f7ffe72ac7 Merge pull request #4501 from AndroKev/master
only add video-id to archive, when successful
2014-12-17 13:31:33 +01:00
AndroKev
cd58dc3e56 Update YoutubeDL.py 2014-12-17 13:21:22 +01:00
AndroKev
c33bcf2051 only add video-id to archive, when successful
Example:
no space left--> youtube-dl adds the id to archive, but the video isn't fully downloaded
2014-12-17 13:05:19 +01:00
Philipp Hagemeister
7642c08763 release 2014.12.17.2 2014-12-17 11:39:25 +01:00
Philipp Hagemeister
fdc8000810 [downloader] Handle a file ./- (Fixes #4498) 2014-12-17 11:39:06 +01:00
Philipp Hagemeister
a91c9b15e3 release 2014.12.17.1 2014-12-17 11:29:52 +01:00
Philipp Hagemeister
27d67ea2ba [comedycentral] Match URLs with a second ID (fixes #4499) 2014-12-17 11:29:35 +01:00
Philipp Hagemeister
d6a8160902 release 2014.12.17 2014-12-17 10:53:17 +01:00
Philipp Hagemeister
6e1b9395c6 [screencastomatic] Add new extractor (Fixes #4497) 2014-12-17 10:53:12 +01:00
Philipp Hagemeister
b1ccbed3d4 [nhl] Allow upper-case video IDs (Fixes #4494) 2014-12-17 00:26:04 +01:00
Philipp Hagemeister
37381350f8 [aljazeera] Add unicode_literals marker 2014-12-17 00:08:04 +01:00
Philipp Hagemeister
7af808a5ef Improve code style 2014-12-17 00:06:41 +01:00
Philipp Hagemeister
876bef5937 [mit] Modernize 2014-12-17 00:04:24 +01:00
Jaime Marquínez Ferrándiz
a16af51873 flake8: Add more ignored files
* setup.py: the '__version__' variable is not defined in the script, it is loadded from youtube_dl/version.py
* devscripts/buildserver.py: Produces a lot of messages
2014-12-16 20:38:59 +01:00
Jaime Marquínez Ferrándiz
dc9a441bfa Move flake8 configuration to setup.cfg
It will be used when calling flake8 from any directory in the project
2014-12-16 20:34:07 +01:00
Jaime Marquínez Ferrándiz
ee6dfe8308 Use flake8 instead of pyflakes and pep8
It combines both tools
2014-12-16 20:34:07 +01:00
Jaime Marquínez Ferrándiz
2cb5b03e53 [test/test_unicode_literals] Remove duplicated imports 2014-12-16 20:33:23 +01:00
Philipp Hagemeister
964b190350 release 2014.12.16.2 2014-12-16 16:45:35 +01:00
Philipp Hagemeister
13d27a42cc [orf:tvthek] Add support for topic URLs (Fixes #4474) 2014-12-16 16:45:28 +01:00
Philipp Hagemeister
ec05fee43a [brightcove] Add shorter URL scheme for other extractors 2014-12-16 16:38:26 +01:00
Philipp Hagemeister
b50e3bc67f [README] Add table of contents (Closes #4458) 2014-12-16 16:33:23 +01:00
Philipp Hagemeister
ac78b5e97b release 2014.12.16.1 2014-12-16 16:03:57 +01:00
Philipp Hagemeister
17e0d63957 Merge branch 'master' of github.com:rg3/youtube-dl 2014-12-16 16:03:46 +01:00
Sergey M․
9209fe3878 [allocine] Add test for new URL format 2014-12-16 21:03:10 +06:00
Philipp Hagemeister
84d84211ac [youtube:feeds] (Fixes #4486) 2014-12-16 15:59:31 +01:00
Sergey M.
b4116dcdd5 Merge pull request #4490 from Tailszefox/master
[Allocine] Support for more URLs
2014-12-16 20:59:07 +06:00
Jaime Marquínez Ferrándiz
bb18d787b5 [aljazeera] Add extractor (closes #4487) 2014-12-16 15:48:01 +01:00
Tailszefox
0647084f39 [Allocine] Support for more URLs 2014-12-16 15:46:04 +01:00
Philipp Hagemeister
734ea11e3c Drop hash character in downloader output (#4484) 2014-12-16 00:37:42 +01:00
Philipp Hagemeister
3940450878 release 2014.12.16 2014-12-16 00:24:30 +01:00
Philipp Hagemeister
ccbfaa83b0 [devscripts/make_contributing] Switch to optparse (Fixes #4483) 2014-12-16 00:24:11 +01:00
Philipp Hagemeister
d86007873e [YoutubeDL] Document where details for format can be found 2014-12-16 00:22:23 +01:00
Jaime Marquínez Ferrándiz
4b7df0d30c [youtube:playlist] Work around buggy playlists (fixes #4449)
They show a "Load more" button, but they don't have more videos.
The continuation url in the json file was a link to itself, so we ended up in an infinite loop.
2014-12-15 19:19:15 +01:00
Philipp Hagemeister
caff59499c [README] Fix code rendering 2014-12-15 11:14:06 +01:00
Philipp Hagemeister
99a0f9824a [README] Highlight code examples 2014-12-15 11:11:52 +01:00
Jaime Marquínez Ferrándiz
3013bbb27d Remove unused imports 2014-12-15 08:24:50 +01:00
Naglis Jonaitis
6f9b54933f [streamcloud] Modernize 2014-12-15 03:32:17 +02:00
Naglis Jonaitis
1bbe317508 [mooshare] Modernize 2014-12-15 03:31:54 +02:00
Philipp Hagemeister
e97a534f13 release 2014.12.15 2014-12-15 01:36:46 +01:00
Philipp Hagemeister
8acb83d993 [README] Make example audio sound not that horrible ;) 2014-12-15 01:34:39 +01:00
Philipp Hagemeister
71b640cc5b [YoutubeDL] Add declarative version of progress hooks 2014-12-15 01:26:20 +01:00
Philipp Hagemeister
4f026fafbc [YoutubeDL] Make postprocessors declarative
Instead of having to configure PPs in code, this allows us and embedding programs not to worry about imports or finer details, similarly to how we handle IEs.
2014-12-15 01:06:25 +01:00
Philipp Hagemeister
39f594d660 [Makefile] Ensure that offline test really is offline 2014-12-15 00:59:23 +01:00
Philipp Hagemeister
cae97f6521 Improve and test ffmpeg version detection 2014-12-14 21:59:59 +01:00
Philipp Hagemeister
6cbf345f28 Remove test/write_info_json
This is now covered by every single test_download testcase anyways :)
2014-12-14 21:56:12 +01:00
Philipp Hagemeister
a0ab29f8a1 Add offlinetest make target 2014-12-14 21:55:57 +01:00
Naglis Jonaitis
4a4fbfc967 [yesjapan] Look for datetime inside submit_info
Oops..
2014-12-14 18:03:05 +02:00
Naglis Jonaitis
408b5839b1 [yesjapan] Add new extractor (Closes #4466) 2014-12-14 17:59:25 +02:00
Philipp Hagemeister
60620368d7 [youtube] Fix player ID detection 2014-12-14 00:43:34 +01:00
Philipp Hagemeister
4927de4f86 release 2014.12.14 2014-12-14 00:13:17 +01:00
Philipp Hagemeister
bad5c1a303 [rtp] Also match e-id-less URLs (#4382) 2014-12-14 00:13:07 +01:00
90 changed files with 2807 additions and 548 deletions

View File

@@ -93,3 +93,8 @@ Zack Fernandes
cryptonaut cryptonaut
Adrian Kretz Adrian Kretz
Mathias Rav Mathias Rav
Petr Kutalek
Will Glynn
Max Reimann
Cédric Luthi
Thijs Vermeir

View File

@@ -1,4 +1,4 @@
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
clean: clean:
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json CONTRIBUTING.md.tmp rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json CONTRIBUTING.md.tmp
@@ -35,13 +35,22 @@ install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtu
install -d $(DESTDIR)$(SYSCONFDIR)/fish/completions install -d $(DESTDIR)$(SYSCONFDIR)/fish/completions
install -m 644 youtube-dl.fish $(DESTDIR)$(SYSCONFDIR)/fish/completions/youtube-dl.fish install -m 644 youtube-dl.fish $(DESTDIR)$(SYSCONFDIR)/fish/completions/youtube-dl.fish
codetest:
flake8 .
test: test:
#nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test #nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test
nosetests --verbose test nosetests --verbose test
$(MAKE) codetest
ot: offlinetest
offlinetest: codetest
nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations
tar: youtube-dl.tar.gz tar: youtube-dl.tar.gz
.PHONY: all clean install test tar bash-completion pypi-files zsh-completion fish-completion .PHONY: all clean install test tar bash-completion pypi-files zsh-completion fish-completion ot offlinetest codetest supportedsites
pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish
@@ -59,6 +68,9 @@ README.md: youtube_dl/*.py youtube_dl/*/*.py
CONTRIBUTING.md: README.md CONTRIBUTING.md: README.md
python devscripts/make_contributing.py README.md CONTRIBUTING.md python devscripts/make_contributing.py README.md CONTRIBUTING.md
supportedsites:
python devscripts/make_supportedsites.py docs/supportedsites.md
README.txt: README.md README.txt: README.md
pandoc -f markdown -t plain README.md -o README.txt pandoc -f markdown -t plain README.md -o README.txt

View File

@@ -1,7 +1,15 @@
youtube-dl - download videos from youtube.com or other video platforms youtube-dl - download videos from youtube.com or other video platforms
# SYNOPSIS - [INSTALLATION](#installation)
**youtube-dl** [OPTIONS] URL [URL...] - [DESCRIPTION](#description)
- [OPTIONS](#options)
- [CONFIGURATION](#configuration)
- [OUTPUT TEMPLATE](#output-template)
- [VIDEO SELECTION](#video-selection)
- [FAQ](#faq)
- [DEVELOPER INSTRUCTIONS](#developer-instructions)
- [BUGS](#bugs)
- [COPYRIGHT](#copyright)
# INSTALLATION # INSTALLATION
@@ -34,6 +42,8 @@ YouTube.com and a few more sites. It requires the Python interpreter, version
your Unix box, on Windows or on Mac OS X. It is released to the public domain, your Unix box, on Windows or on Mac OS X. It is released to the public domain,
which means you can modify it, redistribute it or use it however you like. which means you can modify it, redistribute it or use it however you like.
youtube-dl [OPTIONS] URL [URL...]
# OPTIONS # OPTIONS
-h, --help print this help text and exit -h, --help print this help text and exit
--version print program version and exit --version print program version and exit
@@ -439,6 +449,14 @@ Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unz
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29). To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
### How can I detect whether a given URL is supported by youtube-dl?
For one, have a look at the [list of supported sites](docs/supportedsites). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
If you want to find out whether a given URL is supported, simply call youtube-dl with it. If you get no videos back, chances are the URL is either not referring to a video or unsupported. You can find out which by examining the output (if you run youtube-dl on the console) or catching an `UnsupportedError` exception if you run it from a Python program.
# DEVELOPER INSTRUCTIONS # DEVELOPER INSTRUCTIONS
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution. Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
@@ -529,14 +547,52 @@ youtube-dl makes the best effort to be a good command-line program, and thus sho
From a Python program, you can embed youtube-dl in a more powerful fashion, like this: From a Python program, you can embed youtube-dl in a more powerful fashion, like this:
import youtube_dl ```python
import youtube_dl
ydl_opts = {} ydl_opts = {}
with youtube_dl.YoutubeDL(ydl_opts) as ydl: with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc']) ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
```
Most likely, you'll want to use various options. For a list of what can be done, have a look at [youtube_dl/YoutubeDL.py](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L69). For a start, if you want to intercept youtube-dl's output, set a `logger` object. Most likely, you'll want to use various options. For a list of what can be done, have a look at [youtube_dl/YoutubeDL.py](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L69). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
```python
import youtube_dl
class MyLogger(object):
def debug(self, msg):
pass
def warning(self, msg):
pass
def error(self, msg):
print(msg)
def my_hook(d):
if d['status'] == 'finished':
print('Done downloading, now converting ...')
ydl_opts = {
'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}],
'logger': MyLogger(),
'progress_hooks': [my_hook],
}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
```
# BUGS # BUGS
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the irc channel #youtube-dl on freenode. Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the irc channel #youtube-dl on freenode.

View File

@@ -1,20 +1,20 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import unicode_literals from __future__ import unicode_literals
import argparse
import io import io
import optparse
import re import re
def main(): def main():
parser = argparse.ArgumentParser() parser = optparse.OptionParser(usage='%prog INFILE OUTFILE')
parser.add_argument( options, args = parser.parse_args()
'INFILE', help='README.md file name to read from') if len(args) != 2:
parser.add_argument( parser.error('Expected an input and an output filename')
'OUTFILE', help='CONTRIBUTING.md file name to write to')
args = parser.parse_args()
with io.open(args.INFILE, encoding='utf-8') as inf: infile, outfile = args
with io.open(infile, encoding='utf-8') as inf:
readme = inf.read() readme = inf.read()
bug_text = re.search( bug_text = re.search(
@@ -25,7 +25,7 @@ def main():
out = bug_text + dev_text out = bug_text + dev_text
with io.open(args.OUTFILE, 'w', encoding='utf-8') as outf: with io.open(outfile, 'w', encoding='utf-8') as outf:
outf.write(out) outf.write(out)
if __name__ == '__main__': if __name__ == '__main__':

View File

@@ -0,0 +1,45 @@
#!/usr/bin/env python
from __future__ import unicode_literals
import io
import optparse
import os
import sys
# Import youtube_dl
ROOT_DIR = os.path.join(os.path.dirname(__file__), '..')
sys.path.append(ROOT_DIR)
import youtube_dl
def main():
parser = optparse.OptionParser(usage='%prog OUTFILE.md')
options, args = parser.parse_args()
if len(args) != 1:
parser.error('Expected an output filename')
outfile, = args
def gen_ies_md(ies):
for ie in ies:
ie_md = '**{}**'.format(ie.IE_NAME)
ie_desc = getattr(ie, 'IE_DESC', None)
if ie_desc is False:
continue
if ie_desc is not None:
ie_md += ': {}'.format(ie.IE_DESC)
if not ie.working():
ie_md += ' (Currently broken)'
yield ie_md
ies = sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower())
out = '# Supported sites\n' + ''.join(
' - ' + md + '\n'
for md in gen_ies_md(ies))
with io.open(outfile, 'w', encoding='utf-8') as outf:
outf.write(out)
if __name__ == '__main__':
main()

View File

@@ -11,8 +11,19 @@ README_FILE = os.path.join(ROOT_DIR, 'README.md')
with io.open(README_FILE, encoding='utf-8') as f: with io.open(README_FILE, encoding='utf-8') as f:
readme = f.read() readme = f.read()
PREFIX = '%YOUTUBE-DL(1)\n\n# NAME\n' PREFIX = '''%YOUTUBE-DL(1)
readme = re.sub(r'(?s)# INSTALLATION.*?(?=# DESCRIPTION)', '', readme)
# NAME
youtube\-dl \- download videos from youtube.com or other video platforms
# SYNOPSIS
**youtube-dl** \[OPTIONS\] URL [URL...]
'''
readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
readme = PREFIX + readme readme = PREFIX + readme
if sys.version_info < (3, 0): if sys.version_info < (3, 0):

500
docs/supportedsites.md Normal file
View File

@@ -0,0 +1,500 @@
# Supported sites
- **1up.com**
- **220.ro**
- **24video**
- **3sat**
- **4tube**
- **56.com**
- **5min**
- **8tracks**
- **9gag**
- **abc.net.au**
- **AcademicEarth:Course**
- **AddAnime**
- **AdobeTV**
- **AdultSwim**
- **Aftonbladet**
- **AlJazeera**
- **Allocine**
- **anitube.se**
- **AnySex**
- **Aparat**
- **AppleTrailers**
- **archive.org**: archive.org videos
- **ARD**
- **ARD:mediathek**
- **arte.tv**
- **arte.tv:+7**
- **arte.tv:concert**
- **arte.tv:creative**
- **arte.tv:ddc**
- **arte.tv:embed**
- **arte.tv:future**
- **audiomack**
- **AUEngine**
- **Azubu**
- **bambuser**
- **bambuser:channel**
- **Bandcamp**
- **Bandcamp:album**
- **bbc.co.uk**: BBC iPlayer
- **Beeg**
- **BehindKink**
- **Bet**
- **Bild**: Bild.de
- **BiliBili**
- **blinkx**
- **blip.tv:user**
- **BlipTV**
- **Bloomberg**
- **Bpb**: Bundeszentrale für politische Bildung
- **BR**: Bayerischer Rundfunk Mediathek
- **Break**
- **Brightcove**
- **BuzzFeed**
- **BYUtv**
- **Canal13cl**
- **canalc2.tv**
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
- **CBS**
- **CBSNews**: CBS News
- **CeskaTelevize**
- **channel9**: Channel 9
- **Chilloutzone**
- **Cinchcast**
- **Cinemassacre**
- **clipfish**
- **cliphunter**
- **Clipsyndicate**
- **Cloudy**
- **Clubic**
- **cmt.com**
- **CNET**
- **CNN**
- **CNNBlogs**
- **CollegeHumor**
- **ComCarCoff**
- **ComedyCentral**
- **ComedyCentralShows**: The Daily Show / The Colbert Report
- **CondeNast**: Condé Nast media group: Condé Nast, GQ, Glamour, Vanity Fair, Vogue, W Magazine, WIRED
- **Cracked**
- **Criterion**
- **Crunchyroll**
- **crunchyroll:playlist**
- **CSpan**: C-SPAN
- **culturebox.francetvinfo.fr**
- **dailymotion**
- **dailymotion:playlist**
- **dailymotion:user**
- **daum.net**
- **DBTV**
- **DeezerPlaylist**
- **defense.gouv.fr**
- **Discovery**
- **divxstage**: DivxStage
- **Dotsub**
- **Dropbox**
- **DrTuber**
- **DRTV**
- **Dump**
- **dvtv**: http://video.aktualne.cz/
- **EbaumsWorld**
- **eHow**
- **Einthusan**
- **eitb.tv**
- **EllenTV**
- **EllenTV:clips**
- **ElPais**: El País
- **EMPFlix**
- **Engadget**
- **Eporner**
- **Escapist**
- **EveryonesMixtape**
- **exfm**: ex.fm
- **ExpoTV**
- **ExtremeTube**
- **facebook**
- **faz.net**
- **fc2**
- **fernsehkritik.tv**
- **fernsehkritik.tv:postecke**
- **Firedrive**
- **Firstpost**
- **firsttv**: Видеоархив - Первый канал
- **Flickr**
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
- **Foxgay**
- **FoxNews**
- **france2.fr:generation-quoi**
- **FranceCulture**
- **FranceInter**
- **francetv**: France 2, 3, 4, 5 and Ô
- **francetvinfo.fr**
- **Freesound**
- **freespeech.org**
- **FreeVideo**
- **FunnyOrDie**
- **Gamekings**
- **GameOne**
- **gameone:playlist**
- **GameSpot**
- **GameStar**
- **Gametrailers**
- **GDCVault**
- **generic**: Generic downloader that works on some sites
- **GiantBomb**
- **Glide**: Glide mobile video messages (glide.me)
- **Globo**
- **GodTube**
- **GoldenMoustache**
- **Golem**
- **GorillaVid**: GorillaVid.in, daclips.in, movpod.in and fastvideo.in
- **Goshgay**
- **Grooveshark**
- **Groupon**
- **Hark**
- **Heise**
- **Helsinki**: helsinki.fi
- **HentaiStigma**
- **HornBunny**
- **HostingBulk**
- **HotNewHipHop**
- **Howcast**
- **HowStuffWorks**
- **HuffPost**: Huffington Post
- **Hypem**
- **Iconosquare**
- **ign.com**
- **imdb**: Internet Movie Database trailers
- **imdb:list**: Internet Movie Database lists
- **Ina**
- **InfoQ**
- **Instagram**
- **instagram:user**: Instagram user profile
- **InternetVideoArchive**
- **IPrima**
- **ivi**: ivi.ru
- **ivi:compilation**: ivi.ru compilations
- **Izlesene**
- **JadoreCettePub**
- **JeuxVideo**
- **Jove**
- **jpopsuki.tv**
- **Jukebox**
- **Kankan**
- **keek**
- **KeezMovies**
- **KhanAcademy**
- **KickStarter**
- **kontrtube**: KontrTube.ru - Труба зовёт
- **KrasView**: Красвью
- **Ku6**
- **la7.tv**
- **Laola1Tv**
- **lifenews**: LIFE | NEWS
- **LiveLeak**
- **livestream**
- **livestream:original**
- **lrt.lt**
- **lynda**: lynda.com videos
- **lynda:course**: lynda.com online courses
- **m6**
- **macgamestore**: MacGameStore trailers
- **mailru**: Видео@Mail.Ru
- **Malemotion**
- **MDR**
- **metacafe**
- **Metacritic**
- **Mgoon**
- **Minhateca**
- **MinistryGrid**
- **mitele.es**
- **mixcloud**
- **MLB**
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
- **Mofosex**
- **Mojvideo**
- **Moniker**: allmyvideos.net and vidspot.net
- **mooshare**: Mooshare.biz
- **Morningstar**: morningstar.com
- **Motherless**
- **Motorsport**: motorsport.com
- **MovieClips**
- **Moviezine**
- **movshare**: MovShare
- **MPORA**
- **MTV**
- **mtviggy.com**
- **mtvservices:embedded**
- **MuenchenTV**: münchen.tv
- **MusicPlayOn**
- **MusicVault**
- **muzu.tv**
- **MySpace**
- **MySpace:album**
- **MySpass**
- **myvideo**
- **MyVidster**
- **Naver**
- **NBA**
- **NBC**
- **NBCNews**
- **ndr**: NDR.de - Mediathek
- **NDTV**
- **NerdCubedFeed**
- **Newgrounds**
- **Newstube**
- **nfb**: National Film Board of Canada
- **nfl.com**
- **nhl.com**
- **nhl.com:videocenter**: NHL videocenter category
- **niconico**: ニコニコ動画
- **NiconicoPlaylist**
- **Noco**
- **Normalboots**
- **NosVideo**
- **novamov**: NovaMov
- **Nowness**
- **nowvideo**: NowVideo
- **npo.nl**
- **NRK**
- **NRKTV**
- **NTV**
- **Nuvid**
- **NYTimes**
- **ocw.mit.edu**
- **OktoberfestTV**
- **on.aol.com**
- **Ooyala**
- **orf:oe1**: Radio Österreich 1
- **orf:tvthek**: ORF TVthek
- **ORFFM4**: radio FM4
- **parliamentlive.tv**: UK parliament videos
- **Patreon**
- **PBS**
- **Phoenix**
- **Photobucket**
- **PlanetaPlay**
- **play.fm**
- **played.to**
- **Playvid**
- **plus.google**: Google Plus
- **pluzz.francetv.fr**
- **podomatic**
- **PornHd**
- **PornHub**
- **Pornotube**
- **PornoXO**
- **PromptFile**
- **prosiebensat1**: ProSiebenSat.1 Digital
- **Pyvideo**
- **QuickVid**
- **radio.de**
- **radiofrance**
- **Rai**
- **RBMARadio**
- **RedTube**
- **Restudy**
- **ReverbNation**
- **RingTV**
- **RottenTomatoes**
- **Roxwel**
- **RTBF**
- **RTLnow**
- **rtlxl.nl**
- **RTP**
- **RTS**: RTS.ch
- **rtve.es:alacarta**: RTVE a la carta
- **rtve.es:live**: RTVE.es live streams
- **RUHD**
- **rutube**: Rutube videos
- **rutube:channel**: Rutube channels
- **rutube:movie**: Rutube movies
- **rutube:person**: Rutube person videos
- **RUTV**: RUTV.RU
- **Sapo**: SAPO Vídeos
- **savefrom.net**
- **SBS**: sbs.com.au
- **SciVee**
- **screen.yahoo:search**: Yahoo screen search
- **Screencast**
- **ScreencastOMatic**
- **ScreenwaveMedia**
- **ServingSys**
- **Sexu**
- **SexyKarma**: Sexy Karma and Watch Indian Porn
- **Shared**
- **ShareSix**
- **Sina**
- **Slideshare**
- **Slutload**
- **smotri**: Smotri.com
- **smotri:broadcast**: Smotri.com broadcasts
- **smotri:community**: Smotri.com community videos
- **smotri:user**: Smotri.com user videos
- **Snotr**
- **Sockshare**
- **Sohu**
- **soundcloud**
- **soundcloud:playlist**
- **soundcloud:set**
- **soundcloud:user**
- **Soundgasm**
- **southpark.cc.com**
- **southpark.de**
- **Space**
- **Spankwire**
- **Spiegel**
- **Spiegel:Article**: Articles on spiegel.de
- **Spiegeltv**
- **Spike**
- **Sport5**
- **SportBox**
- **SportDeutschland**
- **SRMediathek**: Süddeutscher Rundfunk
- **stanfordoc**: Stanford Open ClassRoom
- **Steam**
- **streamcloud.eu**
- **StreamCZ**
- **SunPorno**
- **SWRMediathek**
- **Syfy**
- **SztvHu**
- **Tagesschau**
- **Tapely**
- **Tass**
- **teachertube**: teachertube.com videos
- **teachertube:user:collection**: teachertube.com user and collection videos
- **TeachingChannel**
- **Teamcoco**
- **TeamFour**
- **TechTalks**
- **techtv.mit.edu**
- **TED**
- **tegenlicht.vpro.nl**
- **TeleBruxelles**
- **telecinco.es**
- **TeleMB**
- **TenPlay**
- **TF1**
- **TheOnion**
- **ThePlatform**
- **TheSixtyOne**
- **ThisAV**
- **THVideo**
- **THVideoPlaylist**
- **tinypic**: tinypic.com videos
- **tlc.com**
- **tlc.de**
- **TMZ**
- **TNAFlix**
- **tou.tv**
- **Toypics**: Toypics user profile
- **ToypicsUser**: Toypics user profile
- **TrailerAddict** (Currently broken)
- **Trilulilu**
- **TruTube**
- **Tube8**
- **Tudou**
- **Tumblr**
- **TuneIn**
- **Turbo**
- **Tutv**
- **tv.dfb.de**
- **tvigle**: Интернет-телевидение Tvigle.ru
- **tvp.pl**
- **TVPlay**: TV3Play and related services
- **Twitch**
- **Ubu**
- **udemy**
- **udemy:course**
- **Unistra**
- **Urort**: NRK P3 Urørt
- **ustream**
- **ustream:channel**
- **Vbox7**
- **VeeHD**
- **Veoh**
- **Vesti**: Вести.Ru
- **Vevo**
- **VGTV**
- **vh1.com**
- **Vice**
- **Viddler**
- **video.google:search**: Google Video search
- **video.mit.edu**
- **VideoBam**
- **VideoDetective**
- **videofy.me**
- **videolectures.net**
- **VideoMega**
- **VideoPremium**
- **VideoTt**: video.tt - Your True Tube
- **videoweed**: VideoWeed
- **Vidme**
- **Vidzi**
- **viki**
- **vimeo**
- **vimeo:album**
- **vimeo:channel**
- **vimeo:group**
- **vimeo:likes**: Vimeo user likes
- **vimeo:review**: Review pages on vimeo
- **vimeo:user**
- **vimeo:watchlater**: Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)
- **Vimple**: Vimple.ru
- **Vine**
- **vine:user**
- **vk.com**
- **vk.com:user-videos**: vk.com:All of a user's videos
- **Vodlocker**
- **Vporn**
- **VRT**
- **vube**: Vube.com
- **VuClip**
- **vulture.com**
- **Walla**
- **WashingtonPost**
- **wat.tv**
- **WayOfTheMaster**
- **WDR**
- **wdr:mobile**
- **WDRMaus**: Sendung mit der Maus
- **Weibo**
- **Wimp**
- **Wistia**
- **WorldStarHipHop**
- **wrzuta.pl**
- **XBef**
- **XboxClips**
- **XHamster**
- **XMinus**
- **XNXX**
- **XTube**
- **XTubeUser**: XTube user profile
- **XVideos**
- **Yahoo**: Yahoo screen and movies
- **YesJapan**
- **Ynet**
- **YouJizz**
- **Youku**
- **YouPorn**
- **YourUpload**
- **youtube**: YouTube.com
- **youtube:channel**: YouTube.com channels
- **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication)
- **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
- **youtube:playlist**: YouTube.com playlists
- **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
- **youtube:search**: YouTube.com searches
- **youtube:search:date**: YouTube.com searches, newest videos first
- **youtube:search_url**: YouTube.com search URLs
- **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:toplist**: YouTube.com top lists, "yttoplist:{channel}:{list title}" (Example: "yttoplist:music:Top Tracks")
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
- **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
- **ZDF**
- **ZDFChannel**
- **zingmp3:album**: mp3.zing.vn albums
- **zingmp3:song**: mp3.zing.vn songs

View File

@@ -1,2 +1,6 @@
[wheel] [wheel]
universal = True universal = True
[flake8]
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build
ignore = E501

View File

@@ -99,7 +99,7 @@ def gettestcases(include_onlymatching=False):
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
def expect_info_dict(self, expected_dict, got_dict): def expect_info_dict(self, got_dict, expected_dict):
for info_field, expected in expected_dict.items(): for info_field, expected in expected_dict.items():
if isinstance(expected, compat_str) and expected.startswith('re:'): if isinstance(expected, compat_str) and expected.startswith('re:'):
got = got_dict.get(info_field) got = got_dict.get(info_field)

View File

@@ -155,7 +155,7 @@ def generator(test_case):
if is_playlist: if is_playlist:
self.assertEqual(res_dict['_type'], 'playlist') self.assertEqual(res_dict['_type'], 'playlist')
self.assertTrue('entries' in res_dict) self.assertTrue('entries' in res_dict)
expect_info_dict(self, test_case.get('info_dict', {}), res_dict) expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
if 'playlist_mincount' in test_case: if 'playlist_mincount' in test_case:
assertGreaterEqual( assertGreaterEqual(
@@ -204,7 +204,7 @@ def generator(test_case):
with io.open(info_json_fn, encoding='utf-8') as infof: with io.open(info_json_fn, encoding='utf-8') as infof:
info_dict = json.load(infof) info_dict = json.load(infof)
expect_info_dict(self, tc.get('info_dict', {}), info_dict) expect_info_dict(self, info_dict, tc.get('info_dict', {}))
finally: finally:
try_rm_tcs_files() try_rm_tcs_files()
if is_playlist and res_dict is not None and res_dict.get('entries'): if is_playlist and res_dict is not None and res_dict.get('entries'):

View File

@@ -88,6 +88,14 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertTrue(subtitles['it'] is not None) self.assertTrue(subtitles['it'] is not None)
def test_youtube_translated_subtitles(self):
# This video has a subtitles track, which can be translated
self.url = 'Ky9eprVWzlI'
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles()
self.assertTrue(subtitles['it'] is not None)
def test_youtube_nosubtitles(self): def test_youtube_nosubtitles(self):
self.DL.expect_warning('video doesn\'t have subtitles') self.DL.expect_warning('video doesn\'t have subtitles')
self.url = 'n5BB19UTcdA' self.url = 'n5BB19UTcdA'

View File

@@ -7,9 +7,7 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import io import io
import os
import re import re
import unittest
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

View File

@@ -16,39 +16,40 @@ import json
import xml.etree.ElementTree import xml.etree.ElementTree
from youtube_dl.utils import ( from youtube_dl.utils import (
args_to_str,
clean_html, clean_html,
DateRange, DateRange,
detect_exe_version,
encodeFilename, encodeFilename,
escape_rfc3986,
escape_url,
find_xpath_attr, find_xpath_attr,
fix_xml_ampersands, fix_xml_ampersands,
orderedSet,
OnDemandPagedList,
InAdvancePagedList, InAdvancePagedList,
intlist_to_bytes,
js_to_json,
limit_length,
OnDemandPagedList,
orderedSet,
parse_duration, parse_duration,
parse_filesize,
parse_iso8601,
read_batch_urls, read_batch_urls,
sanitize_filename, sanitize_filename,
shell_quote, shell_quote,
smuggle_url, smuggle_url,
str_to_int, str_to_int,
strip_jsonp,
struct_unpack, struct_unpack,
timeconvert, timeconvert,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
unsmuggle_url, unsmuggle_url,
uppercase_escape,
url_basename, url_basename,
urlencode_postdata, urlencode_postdata,
xpath_with_ns,
parse_iso8601,
strip_jsonp,
uppercase_escape,
limit_length,
escape_rfc3986,
escape_url,
js_to_json,
intlist_to_bytes,
args_to_str,
parse_filesize,
version_tuple, version_tuple,
xpath_with_ns,
) )
@@ -390,5 +391,16 @@ class TestUtil(unittest.TestCase):
self.assertEqual(version_tuple('10.23.344'), (10, 23, 344)) self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
self.assertEqual(version_tuple('10.1-6'), (10, 1, 6)) # avconv style self.assertEqual(version_tuple('10.1-6'), (10, 1, 6)) # avconv style
def test_detect_exe_version(self):
self.assertEqual(detect_exe_version('''ffmpeg version 1.2.1
built on May 27 2013 08:37:26 with gcc 4.7 (Debian 4.7.3-4)
configuration: --prefix=/usr --extra-'''), '1.2.1')
self.assertEqual(detect_exe_version('''ffmpeg version N-63176-g1fb4685
built on May 15 2014 22:09:06 with gcc 4.8.2 (GCC)'''), 'N-63176-g1fb4685')
self.assertEqual(detect_exe_version('''X server found. dri2 connection failed!
Trying to open render node...
Success at /dev/dri/renderD128.
ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -1,76 +0,0 @@
#!/usr/bin/env python
# coding: utf-8
from __future__ import unicode_literals
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_params
import io
import json
import youtube_dl.YoutubeDL
import youtube_dl.extractor
class YoutubeDL(youtube_dl.YoutubeDL):
def __init__(self, *args, **kwargs):
super(YoutubeDL, self).__init__(*args, **kwargs)
self.to_stderr = self.to_screen
params = get_params({
'writeinfojson': True,
'skip_download': True,
'writedescription': True,
})
TEST_ID = 'BaW_jenozKc'
INFO_JSON_FILE = TEST_ID + '.info.json'
DESCRIPTION_FILE = TEST_ID + '.mp4.description'
EXPECTED_DESCRIPTION = '''test chars: "'/\ä↭𝕐
test URL: https://github.com/rg3/youtube-dl/issues/1892
This is a test video for youtube-dl.
For more information, contact phihag@phihag.de .'''
class TestInfoJSON(unittest.TestCase):
def setUp(self):
# Clear old files
self.tearDown()
def test_info_json(self):
ie = youtube_dl.extractor.YoutubeIE()
ydl = YoutubeDL(params)
ydl.add_info_extractor(ie)
ydl.download([TEST_ID])
self.assertTrue(os.path.exists(INFO_JSON_FILE))
with io.open(INFO_JSON_FILE, 'r', encoding='utf-8') as jsonf:
jd = json.load(jsonf)
self.assertEqual(jd['upload_date'], '20121002')
self.assertEqual(jd['description'], EXPECTED_DESCRIPTION)
self.assertEqual(jd['id'], TEST_ID)
self.assertEqual(jd['extractor'], 'youtube')
self.assertEqual(jd['title'], '''youtube-dl test video "'/\ä↭𝕐''')
self.assertEqual(jd['uploader'], 'Philipp Hagemeister')
self.assertTrue(os.path.exists(DESCRIPTION_FILE))
with io.open(DESCRIPTION_FILE, 'r', encoding='utf-8') as descf:
descr = descf.read()
self.assertEqual(descr, EXPECTED_DESCRIPTION)
def tearDown(self):
if os.path.exists(INFO_JSON_FILE):
os.remove(INFO_JSON_FILE)
if os.path.exists(DESCRIPTION_FILE):
os.remove(DESCRIPTION_FILE)
if __name__ == '__main__':
unittest.main()

View File

@@ -27,6 +27,7 @@ from .compat import (
compat_cookiejar, compat_cookiejar,
compat_expanduser, compat_expanduser,
compat_http_client, compat_http_client,
compat_kwargs,
compat_str, compat_str,
compat_urllib_error, compat_urllib_error,
compat_urllib_request, compat_urllib_request,
@@ -67,7 +68,11 @@ from .cache import Cache
from .extractor import get_info_extractor, gen_extractors from .extractor import get_info_extractor, gen_extractors
from .downloader import get_suitable_downloader from .downloader import get_suitable_downloader
from .downloader.rtmp import rtmpdump_version from .downloader.rtmp import rtmpdump_version
from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor from .postprocessor import (
FFmpegMergerPP,
FFmpegPostProcessor,
get_postprocessor,
)
from .version import __version__ from .version import __version__
@@ -116,7 +121,7 @@ class YoutubeDL(object):
dump_single_json: Force printing the info_dict of the whole playlist dump_single_json: Force printing the info_dict of the whole playlist
(or video) as a single JSON line. (or video) as a single JSON line.
simulate: Do not download the video files. simulate: Do not download the video files.
format: Video format code. format: Video format code. See options.py for more information.
format_limit: Highest quality format to try. format_limit: Highest quality format to try.
outtmpl: Template for output names. outtmpl: Template for output names.
restrictfilenames: Do not allow "&" and spaces in file names restrictfilenames: Do not allow "&" and spaces in file names
@@ -176,6 +181,28 @@ class YoutubeDL(object):
extract_flat: Do not resolve URLs, return the immediate result. extract_flat: Do not resolve URLs, return the immediate result.
Pass in 'in_playlist' to only show this behavior for Pass in 'in_playlist' to only show this behavior for
playlist items. playlist items.
postprocessors: A list of dictionaries, each with an entry
* key: The name of the postprocessor. See
youtube_dl/postprocessor/__init__.py for a list.
as well as any further keyword arguments for the
postprocessor.
progress_hooks: A list of functions that get called on download
progress, with a dictionary with the entries
* filename: The final filename
* status: One of "downloading" and "finished"
The dict may also have some of the following entries:
* downloaded_bytes: Bytes on disk
* total_bytes: Size of the whole file, None if unknown
* tmpfilename: The filename we're currently writing to
* eta: The estimated time in seconds, None if unknown
* speed: The download speed in bytes/second, None if
unknown
Progress hooks are guaranteed to be called at least once
(with status "finished") if the download is successful.
The following parameters are not used by YoutubeDL itself, they are used by The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader: the FileDownloader:
@@ -256,6 +283,16 @@ class YoutubeDL(object):
self.print_debug_header() self.print_debug_header()
self.add_default_info_extractors() self.add_default_info_extractors()
for pp_def_raw in self.params.get('postprocessors', []):
pp_class = get_postprocessor(pp_def_raw['key'])
pp_def = dict(pp_def_raw)
del pp_def['key']
pp = pp_class(self, **compat_kwargs(pp_def))
self.add_post_processor(pp)
for ph in self.params.get('progress_hooks', []):
self.add_progress_hook(ph)
def warn_if_short_id(self, argv): def warn_if_short_id(self, argv):
# short YouTube ID starting with dash? # short YouTube ID starting with dash?
idxs = [ idxs = [
@@ -675,7 +712,7 @@ class YoutubeDL(object):
entries = entries[::-1] entries = entries[::-1]
for i, entry in enumerate(entries, 1): for i, entry in enumerate(entries, 1):
self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries)) self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
extra = { extra = {
'n_entries': n_entries, 'n_entries': n_entries,
'playlist': playlist, 'playlist': playlist,
@@ -979,13 +1016,13 @@ class YoutubeDL(object):
descfn = filename + '.description' descfn = filename + '.description'
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)): if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
self.to_screen('[info] Video description is already present') self.to_screen('[info] Video description is already present')
elif info_dict.get('description') is None:
self.report_warning('There\'s no description to write.')
else: else:
try: try:
self.to_screen('[info] Writing video description to: ' + descfn) self.to_screen('[info] Writing video description to: ' + descfn)
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
descfile.write(info_dict['description']) descfile.write(info_dict['description'])
except (KeyError, TypeError):
self.report_warning('There\'s no description to write.')
except (OSError, IOError): except (OSError, IOError):
self.report_error('Cannot write description file ' + descfn) self.report_error('Cannot write description file ' + descfn)
return return
@@ -1112,8 +1149,7 @@ class YoutubeDL(object):
except (PostProcessingError) as err: except (PostProcessingError) as err:
self.report_error('postprocessing: %s' % str(err)) self.report_error('postprocessing: %s' % str(err))
return return
self.record_download_archive(info_dict)
self.record_download_archive(info_dict)
def download(self, url_list): def download(self, url_list):
"""Download a given list of URLs.""" """Download a given list of URLs."""
@@ -1297,7 +1333,9 @@ class YoutubeDL(object):
formats = info_dict.get('formats', [info_dict]) formats = info_dict.get('formats', [info_dict])
idlen = max(len('format code'), idlen = max(len('format code'),
max(len(f['format_id']) for f in formats)) max(len(f['format_id']) for f in formats))
formats_s = [line(f, idlen) for f in formats] formats_s = [
line(f, idlen) for f in formats
if f.get('preference') is None or f['preference'] >= -1000]
if len(formats) > 1: if len(formats) > 1:
formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)' formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)' formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'

View File

@@ -40,16 +40,6 @@ from .downloader import (
) )
from .extractor import gen_extractors from .extractor import gen_extractors
from .YoutubeDL import YoutubeDL from .YoutubeDL import YoutubeDL
from .postprocessor import (
AtomicParsleyPP,
FFmpegAudioFixPP,
FFmpegMetadataPP,
FFmpegVideoConvertor,
FFmpegExtractAudioPP,
FFmpegEmbedSubtitlePP,
XAttrMetadataPP,
ExecAfterDownloadPP,
)
def _real_main(argv=None): def _real_main(argv=None):
@@ -122,7 +112,7 @@ def _real_main(argv=None):
if desc is False: if desc is False:
continue continue
if hasattr(ie, 'SEARCH_KEY'): if hasattr(ie, 'SEARCH_KEY'):
_SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny') _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
_COUNTS = ('', '5', '10', 'all') _COUNTS = ('', '5', '10', 'all')
desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
compat_print(desc) compat_print(desc)
@@ -212,6 +202,43 @@ def _real_main(argv=None):
any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
# PostProcessors
postprocessors = []
# Add the metadata pp first, the other pps will copy it
if opts.addmetadata:
postprocessors.append({'key': 'FFmpegMetadata'})
if opts.extractaudio:
postprocessors.append({
'key': 'FFmpegExtractAudio',
'preferredcodec': opts.audioformat,
'preferredquality': opts.audioquality,
'nopostoverwrites': opts.nopostoverwrites,
})
if opts.recodevideo:
postprocessors.append({
'key': 'FFmpegVideoConvertor',
'preferedformat': opts.recodevideo,
})
if opts.embedsubtitles:
postprocessors.append({
'key': 'FFmpegEmbedSubtitle',
'subtitlesformat': opts.subtitlesformat,
})
if opts.xattrs:
postprocessors.append({'key': 'XAttrMetadata'})
if opts.embedthumbnail:
if not opts.addmetadata:
postprocessors.append({'key': 'FFmpegAudioFix'})
postprocessors.append({'key': 'AtomicParsley'})
# Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
# So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
if opts.exec_cmd:
postprocessors.append({
'key': 'ExecAfterDownload',
'verboseOutput': opts.verbose,
'exec_cmd': opts.exec_cmd,
})
ydl_opts = { ydl_opts = {
'usenetrc': opts.usenetrc, 'usenetrc': opts.usenetrc,
'username': opts.username, 'username': opts.username,
@@ -297,32 +324,10 @@ def _real_main(argv=None):
'encoding': opts.encoding, 'encoding': opts.encoding,
'exec_cmd': opts.exec_cmd, 'exec_cmd': opts.exec_cmd,
'extract_flat': opts.extract_flat, 'extract_flat': opts.extract_flat,
'postprocessors': postprocessors,
} }
with YoutubeDL(ydl_opts) as ydl: with YoutubeDL(ydl_opts) as ydl:
# PostProcessors
# Add the metadata pp first, the other pps will copy it
if opts.addmetadata:
ydl.add_post_processor(FFmpegMetadataPP())
if opts.extractaudio:
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
if opts.recodevideo:
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
if opts.embedsubtitles:
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
if opts.xattrs:
ydl.add_post_processor(XAttrMetadataPP())
if opts.embedthumbnail:
if not opts.addmetadata:
ydl.add_post_processor(FFmpegAudioFixPP())
ydl.add_post_processor(AtomicParsleyPP())
# Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
# So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
if opts.exec_cmd:
ydl.add_post_processor(ExecAfterDownloadPP(
verboseOutput=opts.verbose, exec_cmd=opts.exec_cmd))
# Update version # Update version
if opts.update_self: if opts.update_self:
update_self(ydl.to_screen, opts.verbose) update_self(ydl.to_screen, opts.verbose)

View File

@@ -285,7 +285,7 @@ class FileDownloader(object):
Return True on success and False otherwise Return True on success and False otherwise
""" """
# Check file already present # Check file already present
if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False): if filename != '-' and self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
self.report_file_already_downloaded(filename) self.report_file_already_downloaded(filename)
self._hook_progress({ self._hook_progress({
'filename': filename, 'filename': filename,
@@ -305,19 +305,6 @@ class FileDownloader(object):
ph(status) ph(status)
def add_progress_hook(self, ph): def add_progress_hook(self, ph):
""" ph gets called on download progress, with a dictionary with the entries # See YoutubeDl.py (search for progress_hooks) for a description of
* filename: The final filename # this interface
* status: One of "downloading" and "finished"
It can also have some of the following entries:
* downloaded_bytes: Bytes on disks
* total_bytes: Total bytes, None if unknown
* tmpfilename: The filename we're currently writing to
* eta: The estimated time in seconds, None if unknown
* speed: The download speed in bytes/second, None if unknown
Hooks are guaranteed to be called at least once (with status "finished")
if the download is successful.
"""
self._progress_hooks.append(ph) self._progress_hooks.append(ph)

View File

@@ -203,7 +203,7 @@ def write_flv_header(stream, metadata):
stream.write(b'\x00\x00\x00\x00\x00\x00\x00') stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
stream.write(metadata) stream.write(metadata)
# Magic numbers extracted from the output files produced by AdobeHDS.php # Magic numbers extracted from the output files produced by AdobeHDS.php
#(https://github.com/K-S-V/Scripts) # (https://github.com/K-S-V/Scripts)
stream.write(b'\x00\x00\x01\x73') stream.write(b'\x00\x00\x01\x73')

View File

@@ -6,6 +6,7 @@ import subprocess
from .common import FileDownloader from .common import FileDownloader
from ..compat import compat_subprocess_get_DEVNULL from ..compat import compat_subprocess_get_DEVNULL
from ..utils import ( from ..utils import (
check_executable,
encodeFilename, encodeFilename,
) )
@@ -20,11 +21,7 @@ class MplayerFD(FileDownloader):
'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
'-dumpstream', '-dumpfile', tmpfilename, url] '-dumpstream', '-dumpfile', tmpfilename, url]
# Check for mplayer first # Check for mplayer first
try: if not check_executable('mplayer', ['-h']):
subprocess.call(
['mplayer', '-h'],
stdout=compat_subprocess_get_DEVNULL(), stderr=subprocess.STDOUT)
except (OSError, IOError):
self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0]) self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0])
return False return False

View File

@@ -185,7 +185,7 @@ class RtmpFD(FileDownloader):
cursize = os.path.getsize(encodeFilename(tmpfilename)) cursize = os.path.getsize(encodeFilename(tmpfilename))
if prevsize == cursize and retval == RD_FAILED: if prevsize == cursize and retval == RD_FAILED:
break break
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024: if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024:
self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.') self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
retval = RD_SUCCESS retval = RD_SUCCESS

View File

@@ -3,8 +3,11 @@ from __future__ import unicode_literals
from .abc import ABCIE from .abc import ABCIE
from .academicearth import AcademicEarthCourseIE from .academicearth import AcademicEarthCourseIE
from .addanime import AddAnimeIE from .addanime import AddAnimeIE
from .adobetv import AdobeTVIE
from .adultswim import AdultSwimIE from .adultswim import AdultSwimIE
from .aftonbladet import AftonbladetIE from .aftonbladet import AftonbladetIE
from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE
from .anitube import AnitubeIE from .anitube import AnitubeIE
from .anysex import AnySexIE from .anysex import AnySexIE
from .aol import AolIE from .aol import AolIE
@@ -22,6 +25,7 @@ from .arte import (
ArteTVDDCIE, ArteTVDDCIE,
ArteTVEmbedIE, ArteTVEmbedIE,
) )
from .atresplayer import AtresPlayerIE
from .audiomack import AudiomackIE from .audiomack import AudiomackIE
from .auengine import AUEngineIE from .auengine import AUEngineIE
from .azubu import AzubuIE from .azubu import AzubuIE
@@ -62,10 +66,12 @@ from .cnet import CNETIE
from .cnn import ( from .cnn import (
CNNIE, CNNIE,
CNNBlogsIE, CNNBlogsIE,
CNNArticleIE,
) )
from .collegehumor import CollegeHumorIE from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .comcarcoff import ComCarCoffIE from .comcarcoff import ComCarCoffIE
from .commonmistakes import CommonMistakesIE
from .condenast import CondeNastIE from .condenast import CondeNastIE
from .cracked import CrackedIE from .cracked import CrackedIE
from .criterion import CriterionIE from .criterion import CriterionIE
@@ -87,12 +93,14 @@ from .dotsub import DotsubIE
from .dreisat import DreiSatIE from .dreisat import DreiSatIE
from .drtuber import DrTuberIE from .drtuber import DrTuberIE
from .drtv import DRTVIE from .drtv import DRTVIE
from .dvtv import DVTVIE
from .dump import DumpIE from .dump import DumpIE
from .defense import DefenseGouvFrIE from .defense import DefenseGouvFrIE
from .discovery import DiscoveryIE from .discovery import DiscoveryIE
from .divxstage import DivxStageIE from .divxstage import DivxStageIE
from .dropbox import DropboxIE from .dropbox import DropboxIE
from .ebaumsworld import EbaumsWorldIE from .ebaumsworld import EbaumsWorldIE
from .echomsk import EchoMskIE
from .ehow import EHowIE from .ehow import EHowIE
from .eighttracks import EightTracksIE from .eighttracks import EightTracksIE
from .einthusan import EinthusanIE from .einthusan import EinthusanIE
@@ -105,6 +113,7 @@ from .elpais import ElPaisIE
from .empflix import EMPFlixIE from .empflix import EMPFlixIE
from .engadget import EngadgetIE from .engadget import EngadgetIE
from .eporner import EpornerIE from .eporner import EpornerIE
from .eroprofile import EroProfileIE
from .escapist import EscapistIE from .escapist import EscapistIE
from .everyonesmixtape import EveryonesMixtapeIE from .everyonesmixtape import EveryonesMixtapeIE
from .exfm import ExfmIE from .exfm import ExfmIE
@@ -163,8 +172,10 @@ from .grooveshark import GroovesharkIE
from .groupon import GrouponIE from .groupon import GrouponIE
from .hark import HarkIE from .hark import HarkIE
from .heise import HeiseIE from .heise import HeiseIE
from .hellporno import HellPornoIE
from .helsinki import HelsinkiIE from .helsinki import HelsinkiIE
from .hentaistigma import HentaiStigmaIE from .hentaistigma import HentaiStigmaIE
from .hitbox import HitboxIE, HitboxLiveIE
from .hornbunny import HornBunnyIE from .hornbunny import HornBunnyIE
from .hostingbulk import HostingBulkIE from .hostingbulk import HostingBulkIE
from .hotnewhiphop import HotNewHipHopIE from .hotnewhiphop import HotNewHipHopIE
@@ -262,6 +273,7 @@ from .nbc import (
) )
from .ndr import NDRIE from .ndr import NDRIE
from .ndtv import NDTVIE from .ndtv import NDTVIE
from .nerdcubed import NerdCubedFeedIE
from .newgrounds import NewgroundsIE from .newgrounds import NewgroundsIE
from .newstube import NewstubeIE from .newstube import NewstubeIE
from .nfb import NFBIE from .nfb import NFBIE
@@ -288,6 +300,7 @@ from .nytimes import NYTimesIE
from .nuvid import NuvidIE from .nuvid import NuvidIE
from .oktoberfesttv import OktoberfestTVIE from .oktoberfesttv import OktoberfestTVIE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from .openfilm import OpenFilmIE
from .orf import ( from .orf import (
ORFTVthekIE, ORFTVthekIE,
ORFOE1IE, ORFOE1IE,
@@ -341,6 +354,7 @@ from .savefrom import SaveFromIE
from .sbs import SBSIE from .sbs import SBSIE
from .scivee import SciVeeIE from .scivee import SciVeeIE
from .screencast import ScreencastIE from .screencast import ScreencastIE
from .screencastomatic import ScreencastOMaticIE
from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE
from .servingsys import ServingSysIE from .servingsys import ServingSysIE
from .sexu import SexuIE from .sexu import SexuIE
@@ -401,6 +415,7 @@ from .ted import TEDIE
from .telebruxelles import TeleBruxellesIE from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE from .telecinco import TelecincoIE
from .telemb import TeleMBIE from .telemb import TeleMBIE
from .teletask import TeleTaskIE
from .tenplay import TenPlayIE from .tenplay import TenPlayIE
from .testurl import TestURLIE from .testurl import TestURLIE
from .tf1 import TF1IE from .tf1 import TF1IE
@@ -459,6 +474,7 @@ from .videott import VideoTtIE
from .videoweed import VideoWeedIE from .videoweed import VideoWeedIE
from .vidme import VidmeIE from .vidme import VidmeIE
from .vidzi import VidziIE from .vidzi import VidziIE
from .vier import VierIE, VierVideosIE
from .vimeo import ( from .vimeo import (
VimeoIE, VimeoIE,
VimeoAlbumIE, VimeoAlbumIE,
@@ -506,10 +522,12 @@ from .xminus import XMinusIE
from .xnxx import XNXXIE from .xnxx import XNXXIE
from .xvideos import XVideosIE from .xvideos import XVideosIE
from .xtube import XTubeUserIE, XTubeIE from .xtube import XTubeUserIE, XTubeIE
from .xxxymovies import XXXYMoviesIE
from .yahoo import ( from .yahoo import (
YahooIE, YahooIE,
YahooSearchIE, YahooSearchIE,
) )
from .yesjapan import YesJapanIE
from .ynet import YnetIE from .ynet import YnetIE
from .youjizz import YouJizzIE from .youjizz import YouJizzIE
from .youku import YoukuIE from .youku import YoukuIE
@@ -528,6 +546,7 @@ from .youtube import (
YoutubeShowIE, YoutubeShowIE,
YoutubeSubscriptionsIE, YoutubeSubscriptionsIE,
YoutubeTopListIE, YoutubeTopListIE,
YoutubeTruncatedIDIE,
YoutubeTruncatedURLIE, YoutubeTruncatedURLIE,
YoutubeUserIE, YoutubeUserIE,
YoutubeWatchLaterIE, YoutubeWatchLaterIE,

View File

@@ -0,0 +1,70 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
parse_duration,
unified_strdate,
str_to_int,
)
class AdobeTVIE(InfoExtractor):
_VALID_URL = r'https?://tv\.adobe\.com/watch/[^/]+/(?P<id>[^/]+)'
_TEST = {
'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/',
'md5': '9bc5727bcdd55251f35ad311ca74fa1e',
'info_dict': {
'id': 'quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop',
'ext': 'mp4',
'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop',
'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311',
'thumbnail': 're:https?://.*\.jpg$',
'upload_date': '20110914',
'duration': 60,
'view_count': int,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player = self._parse_json(
self._search_regex(r'html5player:\s*({.+?})\s*\n', webpage, 'player'),
video_id)
title = player.get('title') or self._search_regex(
r'data-title="([^"]+)"', webpage, 'title')
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(
self._html_search_meta('datepublished', webpage, 'upload date'))
duration = parse_duration(
self._html_search_meta('duration', webpage, 'duration')
or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration'))
view_count = str_to_int(self._search_regex(
r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>',
webpage, 'view count'))
formats = [{
'url': source['src'],
'format_id': source.get('quality') or source['src'].split('-')[-1].split('.')[0] or None,
'tbr': source.get('bitrate'),
} for source in player['sources']]
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'upload_date': upload_date,
'duration': duration,
'view_count': view_count,
'formats': formats,
}

View File

@@ -0,0 +1,35 @@
from __future__ import unicode_literals
from .common import InfoExtractor
class AlJazeeraIE(InfoExtractor):
_VALID_URL = r'http://www\.aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
_TEST = {
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
'info_dict': {
'id': '3792260579001',
'ext': 'mp4',
'title': 'The Slum - Episode 1: Deliverance',
'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.',
'uploader': 'Al Jazeera English',
},
'add_ie': ['Brightcove'],
}
def _real_extract(self, url):
program_name = self._match_id(url)
webpage = self._download_webpage(url, program_name)
brightcove_id = self._search_regex(
r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
return {
'_type': 'url',
'url': (
'brightcove:'
'playerKey=AQ~~%2CAAAAmtVJIFk~%2CTVGOQ5ZTwJbeMWnq5d_H4MOM57xfzApc'
'&%40videoPlayer={0}'.format(brightcove_id)
),
'ie_key': 'Brightcove',
}

View File

@@ -12,7 +12,7 @@ from ..utils import (
class AllocineIE(InfoExtractor): class AllocineIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=)(?P<id>[0-9]+)(?:\.html)?' _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P<id>[0-9]+)(?:\.html)?'
_TESTS = [{ _TESTS = [{
'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html', 'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html',
@@ -44,6 +44,9 @@ class AllocineIE(InfoExtractor):
'description': 'md5:71742e3a74b0d692c7fce0dd2017a4ac', 'description': 'md5:71742e3a74b0d692c7fce0dd2017a4ac',
'thumbnail': 're:http://.*\.jpg', 'thumbnail': 're:http://.*\.jpg',
}, },
}, {
'url': 'http://www.allocine.fr/video/video-19550147/',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@@ -0,0 +1,77 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
parse_duration,
parse_filesize,
int_or_none,
)
class AlphaPornoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?alphaporno\.com/videos/(?P<id>[^/]+)'
_TEST = {
'url': 'http://www.alphaporno.com/videos/sensual-striptease-porn-with-samantha-alexandra/',
'md5': 'feb6d3bba8848cd54467a87ad34bd38e',
'info_dict': {
'id': '258807',
'display_id': 'sensual-striptease-porn-with-samantha-alexandra',
'ext': 'mp4',
'title': 'Sensual striptease porn with Samantha Alexandra',
'thumbnail': 're:https?://.*\.jpg$',
'timestamp': 1418694611,
'upload_date': '20141216',
'duration': 387,
'filesize_approx': 54120000,
'tbr': 1145,
'categories': list,
'age_limit': 18,
}
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r"video_id\s*:\s*'([^']+)'", webpage, 'video id', default=None)
video_url = self._search_regex(
r"video_url\s*:\s*'([^']+)'", webpage, 'video url')
ext = self._html_search_meta(
'encodingFormat', webpage, 'ext', default='.mp4')[1:]
title = self._search_regex(
[r'<meta content="([^"]+)" itemprop="description">',
r'class="title" itemprop="name">([^<]+)<'],
webpage, 'title')
thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail')
timestamp = parse_iso8601(self._html_search_meta(
'uploadDate', webpage, 'upload date'))
duration = parse_duration(self._html_search_meta(
'duration', webpage, 'duration'))
filesize_approx = parse_filesize(self._html_search_meta(
'contentSize', webpage, 'file size'))
bitrate = int_or_none(self._html_search_meta(
'bitrate', webpage, 'bitrate'))
categories = self._html_search_meta(
'keywords', webpage, 'categories', default='').split(',')
age_limit = self._rta_search(webpage)
return {
'id': video_id,
'display_id': display_id,
'url': video_url,
'ext': ext,
'title': title,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'filesize_approx': filesize_approx,
'tbr': bitrate,
'categories': categories,
'age_limit': age_limit,
}

View File

@@ -1,42 +1,48 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import unified_strdate
unified_strdate,
)
class ArchiveOrgIE(InfoExtractor): class ArchiveOrgIE(InfoExtractor):
IE_NAME = 'archive.org' IE_NAME = 'archive.org'
IE_DESC = 'archive.org videos' IE_DESC = 'archive.org videos'
_VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$' _VALID_URL = r'https?://(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
_TEST = { _TESTS = [{
"url": "http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect", 'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
'file': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
'md5': '8af1d4cf447933ed3c7f4871162602db', 'md5': '8af1d4cf447933ed3c7f4871162602db',
'info_dict': { 'info_dict': {
"title": "1968 Demo - FJCC Conference Presentation Reel #1", 'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect',
"description": "Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also <a href=\"http://dougengelbart.org/firsts/dougs-1968-demo.html\" rel=\"nofollow\">Doug's 1968 Demo page</a> for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | <a href=\"http://www.archive.org/details/XD300-24_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 2</a> | <a href=\"http://www.archive.org/details/XD300-25_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 3</a>", 'ext': 'ogv',
"upload_date": "19681210", 'title': '1968 Demo - FJCC Conference Presentation Reel #1',
"uploader": "SRI International" 'description': 'md5:1780b464abaca9991d8968c877bb53ed',
'upload_date': '19681210',
'uploader': 'SRI International'
} }
} }, {
'url': 'https://archive.org/details/Cops1922',
'md5': '18f2a19e6d89af8425671da1cf3d4e04',
'info_dict': {
'id': 'Cops1922',
'ext': 'ogv',
'title': 'Buster Keaton\'s "Cops" (1922)',
'description': 'md5:70f72ee70882f713d4578725461ffcc3',
}
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
json_url = url + ('?' if '?' in url else '&') + 'output=json' json_url = url + ('?' if '?' in url else '&') + 'output=json'
json_data = self._download_webpage(json_url, video_id) data = self._download_json(json_url, video_id)
data = json.loads(json_data)
title = data['metadata']['title'][0] def get_optional(data_dict, field):
description = data['metadata']['description'][0] return data_dict['metadata'].get(field, [None])[0]
uploader = data['metadata']['creator'][0]
upload_date = unified_strdate(data['metadata']['date'][0]) title = get_optional(data, 'title')
description = get_optional(data, 'description')
uploader = get_optional(data, 'creator')
upload_date = unified_strdate(get_optional(data, 'date'))
formats = [ formats = [
{ {

View File

@@ -37,7 +37,7 @@ class ArteTvIE(InfoExtractor):
config_xml_url, video_id, note='Downloading configuration') config_xml_url, video_id, note='Downloading configuration')
formats = [{ formats = [{
'forma_id': q.attrib['quality'], 'format_id': q.attrib['quality'],
# The playpath starts at 'mp4:', if we don't manually # The playpath starts at 'mp4:', if we don't manually
# split the url, rtmpdump will incorrectly parse them # split the url, rtmpdump will incorrectly parse them
'url': q.text.split('mp4:', 1)[0], 'url': q.text.split('mp4:', 1)[0],
@@ -133,7 +133,7 @@ class ArteTVPlus7IE(InfoExtractor):
'width': int_or_none(f.get('width')), 'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height')), 'height': int_or_none(f.get('height')),
'tbr': int_or_none(f.get('bitrate')), 'tbr': int_or_none(f.get('bitrate')),
'quality': qfunc(f['quality']), 'quality': qfunc(f.get('quality')),
'source_preference': source_pref, 'source_preference': source_pref,
} }

View File

@@ -0,0 +1,114 @@
from __future__ import unicode_literals
import time
import hmac
from .common import InfoExtractor
from ..utils import (
compat_str,
compat_urllib_request,
int_or_none,
float_or_none,
xpath_text,
ExtractorError,
)
class AtresPlayerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
_TESTS = [
{
'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html',
'md5': 'efd56753cda1bb64df52a3074f62e38a',
'info_dict': {
'id': 'capitulo-10-especial-solidario-nochebuena',
'ext': 'mp4',
'title': 'Especial Solidario de Nochebuena',
'description': 'md5:e2d52ff12214fa937107d21064075bf1',
'duration': 5527.6,
'thumbnail': 're:^https?://.*\.jpg$',
},
},
{
'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html',
'only_matching': True,
},
]
_USER_AGENT = 'Dalvik/1.6.0 (Linux; U; Android 4.3; GT-I9300 Build/JSS15J'
_MAGIC = 'QWtMLXs414Yo+c#_+Q#K@NN)'
_TIMESTAMP_SHIFT = 30000
_TIME_API_URL = 'http://servicios.atresplayer.com/api/admin/time.json'
_URL_VIDEO_TEMPLATE = 'https://servicios.atresplayer.com/api/urlVideo/{1}/{0}/{1}|{2}|{3}.json'
_PLAYER_URL_TEMPLATE = 'https://servicios.atresplayer.com/episode/getplayer.json?episodePk=%s'
_EPISODE_URL_TEMPLATE = 'http://www.atresplayer.com/episodexml/%s'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
episode_id = self._search_regex(
r'episode="([^"]+)"', webpage, 'episode id')
timestamp = int_or_none(self._download_webpage(
self._TIME_API_URL,
video_id, 'Downloading timestamp', fatal=False), 1000, time.time())
timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT)
token = hmac.new(
self._MAGIC.encode('ascii'),
(episode_id + timestamp_shifted).encode('utf-8')
).hexdigest()
formats = []
for fmt in ['windows', 'android_tablet']:
request = compat_urllib_request.Request(
self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
request.add_header('Youtubedl-user-agent', self._USER_AGENT)
fmt_json = self._download_json(
request, video_id, 'Downloading %s video JSON' % fmt)
result = fmt_json.get('resultDes')
if result.lower() != 'ok':
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, result), expected=True)
for _, video_url in fmt_json['resultObject'].items():
if video_url.endswith('/Manifest'):
formats.extend(self._extract_f4m_formats(video_url[:-9] + '/manifest.f4m', video_id))
else:
formats.append({
'url': video_url,
'format_id': 'android',
'preference': 1,
})
self._sort_formats(formats)
player = self._download_json(
self._PLAYER_URL_TEMPLATE % episode_id,
episode_id)
path_data = player.get('pathData')
episode = self._download_xml(
self._EPISODE_URL_TEMPLATE % path_data,
video_id, 'Downloading episode XML')
duration = float_or_none(xpath_text(
episode, './media/asset/info/technical/contentDuration', 'duration'))
art = episode.find('./media/asset/info/art')
title = xpath_text(art, './name', 'title')
description = xpath_text(art, './description', 'description')
thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}

View File

@@ -10,7 +10,7 @@ from ..compat import compat_HTTPError
class BBCCoUkIE(SubtitlesInfoExtractor): class BBCCoUkIE(SubtitlesInfoExtractor):
IE_NAME = 'bbc.co.uk' IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer' IE_DESC = 'BBC iPlayer'
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z]{8})' _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
_TESTS = [ _TESTS = [
{ {
@@ -18,8 +18,8 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
'info_dict': { 'info_dict': {
'id': 'b039d07m', 'id': 'b039d07m',
'ext': 'flv', 'ext': 'flv',
'title': 'Kaleidoscope: Leonard Cohen', 'title': 'Kaleidoscope, Leonard Cohen',
'description': 'md5:db4755d7a665ae72343779f7dacb402c', 'description': 'The Canadian poet and songwriter reflects on his musical career.',
'duration': 1740, 'duration': 1740,
}, },
'params': { 'params': {
@@ -71,7 +71,54 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only', 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
}, }, {
'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
'info_dict': {
'id': 'b04v209v',
'ext': 'flv',
'title': 'Pete Tong, The Essential New Tune Special',
'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
'duration': 10800,
},
'params': {
# rtmp download
'skip_download': True,
}
}, {
'url': 'http://www.bbc.co.uk/music/clips/p02frcc3',
'note': 'Audio',
'info_dict': {
'id': 'p02frcch',
'ext': 'flv',
'title': 'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix',
'description': 'French house superstar Madeon takes us out of the club and onto the after party.',
'duration': 3507,
},
'params': {
# rtmp download
'skip_download': True,
}
}, {
'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
'note': 'Video',
'info_dict': {
'id': 'p025c103',
'ext': 'flv',
'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
'duration': 226,
},
'params': {
# rtmp download
'skip_download': True,
}
}, {
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
'only_matching': True,
}, {
'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
'only_matching': True,
}
] ]
def _extract_asx_playlist(self, connection, programme_id): def _extract_asx_playlist(self, connection, programme_id):
@@ -203,6 +250,59 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
return formats, subtitles return formats, subtitles
def _download_playlist(self, playlist_id):
try:
playlist = self._download_json(
'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
playlist_id, 'Downloading playlist JSON')
version = playlist.get('defaultAvailableVersion')
if version:
smp_config = version['smpConfig']
title = smp_config['title']
description = smp_config['summary']
for item in smp_config['items']:
kind = item['kind']
if kind != 'programme' and kind != 'radioProgramme':
continue
programme_id = item.get('vpid')
duration = int(item.get('duration'))
formats, subtitles = self._download_media_selector(programme_id)
return programme_id, title, description, duration, formats, subtitles
except ExtractorError as ee:
if not isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
raise
# fallback to legacy playlist
playlist = self._download_xml(
'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id,
playlist_id, 'Downloading legacy playlist XML')
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
if no_items is not None:
reason = no_items.get('reason')
if reason == 'preAvailability':
msg = 'Episode %s is not yet available' % playlist_id
elif reason == 'postAvailability':
msg = 'Episode %s is no longer available' % playlist_id
elif reason == 'noMedia':
msg = 'Episode %s is not currently available' % playlist_id
else:
msg = 'Episode %s is not available: %s' % (playlist_id, reason)
raise ExtractorError(msg, expected=True)
for item in self._extract_items(playlist):
kind = item.get('kind')
if kind != 'programme' and kind != 'radioProgramme':
continue
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
programme_id = item.get('identifier')
duration = int(item.get('duration'))
formats, subtitles = self._download_media_selector(programme_id)
return programme_id, title, description, duration, formats, subtitles
def _real_extract(self, url): def _real_extract(self, url):
group_id = self._match_id(url) group_id = self._match_id(url)
@@ -219,32 +319,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
duration = player['duration'] duration = player['duration']
formats, subtitles = self._download_media_selector(programme_id) formats, subtitles = self._download_media_selector(programme_id)
else: else:
playlist = self._download_xml( programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
'http://www.bbc.co.uk/iplayer/playlist/%s' % group_id,
group_id, 'Downloading playlist XML')
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
if no_items is not None:
reason = no_items.get('reason')
if reason == 'preAvailability':
msg = 'Episode %s is not yet available' % group_id
elif reason == 'postAvailability':
msg = 'Episode %s is no longer available' % group_id
elif reason == 'noMedia':
msg = 'Episode %s is not currently available' % group_id
else:
msg = 'Episode %s is not available: %s' % (group_id, reason)
raise ExtractorError(msg, expected=True)
for item in self._extract_items(playlist):
kind = item.get('kind')
if kind != 'programme' and kind != 'radioProgramme':
continue
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
programme_id = item.get('identifier')
duration = int(item.get('duration'))
formats, subtitles = self._download_media_selector(programme_id)
if self._downloader.params.get('listsubtitles', False): if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(programme_id, subtitles) self._list_available_subtitles(programme_id, subtitles)

View File

@@ -16,7 +16,7 @@ class BetIE(InfoExtractor):
{ {
'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html', 'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
'info_dict': { 'info_dict': {
'id': '417cd61c-c793-4e8e-b006-e445ecc45add', 'id': '406429c6-1b8a-463e-83fc-814adb81a9db',
'display_id': 'in-bet-exclusive-obama-talks-race-and-racism', 'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
'ext': 'flv', 'ext': 'flv',
'title': 'BET News Presents: A Conversation With President Obama', 'title': 'BET News Presents: A Conversation With President Obama',

View File

@@ -25,7 +25,7 @@ from ..utils import (
class BrightcoveIE(InfoExtractor): class BrightcoveIE(InfoExtractor):
_VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*?\?(?P<query>.*)' _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
_TESTS = [ _TESTS = [

View File

@@ -5,6 +5,8 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError,
HEADRequest,
unified_strdate, unified_strdate,
url_basename, url_basename,
qualities, qualities,
@@ -76,6 +78,16 @@ class CanalplusIE(InfoExtractor):
preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD', 'HLS', 'HDS']) preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD', 'HLS', 'HDS'])
fmt_url = next(iter(media.find('VIDEOS'))).text
if '/geo' in fmt_url.lower():
response = self._request_webpage(
HEADRequest(fmt_url), video_id,
'Checking if the video is georestricted')
if '/blocage' in response.geturl():
raise ExtractorError(
'The video is not available in your country',
expected=True)
formats = [] formats = []
for fmt in media.find('VIDEOS'): for fmt in media.find('VIDEOS'):
format_url = fmt.text format_url = fmt.text

View File

@@ -11,6 +11,7 @@ from ..compat import (
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none,
) )
@@ -19,41 +20,33 @@ class CeskaTelevizeIE(InfoExtractor):
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/213512120230004-spanelska-chripka', 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
'info_dict': { 'info_dict': {
'id': '213512120230004', 'id': '214411058091220',
'ext': 'flv', 'ext': 'mp4',
'title': 'První republika: Španělská chřipka', 'title': 'Hyde Park Civilizace',
'duration': 3107.4, 'description': 'Věda a současná civilizace. Interaktivní pořad - prostor pro vaše otázky a komentáře',
'thumbnail': 're:^https?://.*\.jpg',
'duration': 3350,
}, },
'params': { 'params': {
'skip_download': True, # requires rtmpdump # m3u8 download
'skip_download': True,
}, },
'skip': 'Works only from Czech Republic.',
},
{
'url': 'http://www.ceskatelevize.cz/ivysilani/1030584952-tsatsiki-maminka-a-policajt',
'info_dict': {
'id': '20138143440',
'ext': 'flv',
'title': 'Tsatsiki, maminka a policajt',
'duration': 6754.1,
},
'params': {
'skip_download': True, # requires rtmpdump
},
'skip': 'Works only from Czech Republic.',
}, },
{ {
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina', 'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
'info_dict': { 'info_dict': {
'id': '14716', 'id': '14716',
'ext': 'flv', 'ext': 'mp4',
'title': 'První republika: Zpěvačka z Dupárny Bobina', 'title': 'První republika: Zpěvačka z Dupárny Bobina',
'duration': 90, 'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.',
'thumbnail': 're:^https?://.*\.jpg',
'duration': 88.4,
}, },
'params': { 'params': {
'skip_download': True, # requires rtmpdump # m3u8 download
'skip_download': True,
}, },
}, },
] ]
@@ -80,8 +73,9 @@ class CeskaTelevizeIE(InfoExtractor):
'requestSource': 'iVysilani', 'requestSource': 'iVysilani',
} }
req = compat_urllib_request.Request('http://www.ceskatelevize.cz/ivysilani/ajax/get-playlist-url', req = compat_urllib_request.Request(
data=compat_urllib_parse.urlencode(data)) 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
data=compat_urllib_parse.urlencode(data))
req.add_header('Content-type', 'application/x-www-form-urlencoded') req.add_header('Content-type', 'application/x-www-form-urlencoded')
req.add_header('x-addr', '127.0.0.1') req.add_header('x-addr', '127.0.0.1')
@@ -90,39 +84,31 @@ class CeskaTelevizeIE(InfoExtractor):
playlistpage = self._download_json(req, video_id) playlistpage = self._download_json(req, video_id)
req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlistpage['url'])) playlist_url = playlistpage['url']
if playlist_url == 'error_region':
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlist_url))
req.add_header('Referer', url) req.add_header('Referer', url)
playlist = self._download_xml(req, video_id) playlist = self._download_json(req, video_id)
item = playlist['playlist'][0]
formats = [] formats = []
for i in playlist.find('smilRoot/body'): for format_id, stream_url in item['streamUrls'].items():
if 'AD' not in i.attrib['id']: formats.extend(self._extract_m3u8_formats(stream_url, video_id, 'mp4'))
base_url = i.attrib['base']
parsedurl = compat_urllib_parse_urlparse(base_url)
duration = i.attrib['duration']
for video in i.findall('video'):
if video.attrib['label'] != 'AD':
format_id = video.attrib['label']
play_path = video.attrib['src']
vbr = int(video.attrib['system-bitrate'])
formats.append({
'format_id': format_id,
'url': base_url,
'vbr': vbr,
'play_path': play_path,
'app': parsedurl.path[1:] + '?' + parsedurl.query,
'rtmp_live': True,
'ext': 'flv',
})
self._sort_formats(formats) self._sort_formats(formats)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
duration = float_or_none(item.get('duration'))
thumbnail = item.get('previewImageUrl')
return { return {
'id': episode_id, 'id': episode_id,
'title': self._html_search_regex(r'<title>(.+?) — iVysílání — Česká televize</title>', webpage, 'title'), 'title': title,
'duration': float(duration), 'description': description,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats, 'formats': formats,
} }

View File

@@ -11,14 +11,14 @@ from ..utils import (
class CNNIE(InfoExtractor): class CNNIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/ _VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
(?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn(-ap)?|(?=&)))''' (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln)(?:-ap)?|(?=&)))'''
_TESTS = [{ _TESTS = [{
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', 'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
'md5': '3e6121ea48df7e2259fe73a0628605c4', 'md5': '3e6121ea48df7e2259fe73a0628605c4',
'info_dict': { 'info_dict': {
'id': 'sports_2013_06_09_nadal-1-on-1.cnn', 'id': 'sports/2013/06/09/nadal-1-on-1.cnn',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Nadal wins 8th French Open title', 'title': 'Nadal wins 8th French Open title',
'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', 'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
@@ -35,6 +35,16 @@ class CNNIE(InfoExtractor):
"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"", "description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
"upload_date": "20130821", "upload_date": "20130821",
} }
}, {
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
'info_dict': {
'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln',
'ext': 'mp4',
'title': 'Nashville Ep. 1: Hand crafted skateboards',
'description': 'md5:e7223a503315c9f150acac52e76de086',
'upload_date': '20141222',
}
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@@ -127,3 +137,28 @@ class CNNBlogsIE(InfoExtractor):
'url': cnn_url, 'url': cnn_url,
'ie_key': CNNIE.ie_key(), 'ie_key': CNNIE.ie_key(),
} }
class CNNArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!video/)'
_TEST = {
'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
'md5': '275b326f85d80dff7592a9820f5dc887',
'info_dict': {
'id': 'bestoftv/2014/12/21/sotu-crowley-president-obama-north-korea-not-going-to-be-intimidated.cnn',
'ext': 'mp4',
'title': 'Obama: We\'re not going to be intimidated',
'description': 'md5:e735586f3dc936075fa654a4d91b21f9',
'upload_date': '20141220',
},
'add_ie': ['CNN'],
}
def _real_extract(self, url):
webpage = self._download_webpage(url, url_basename(url))
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
return {
'_type': 'url',
'url': 'http://cnn.com/video/?/video/' + cnn_url,
'ie_key': CNNIE.ie_key(),
}

View File

@@ -50,7 +50,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
)| )|
(?P<interview> (?P<interview>
extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?))) extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
(?:[?#].*|$)''' '''
_TESTS = [{ _TESTS = [{
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart', 'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
'md5': '4e2f5cb088a83cd8cdb7756132f9739d', 'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
@@ -83,6 +83,9 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
}, { }, {
'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights', 'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://thedailyshow.cc.com/video-playlists/t6d9sg/the-daily-show-20038-highlights/be3cwo',
'only_matching': True,
}, { }, {
'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food', 'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
'only_matching': True, 'only_matching': True,

View File

@@ -40,7 +40,7 @@ class InfoExtractor(object):
information about the video (or videos) the URL refers to. This information about the video (or videos) the URL refers to. This
information includes the real video URL, the video title, author and information includes the real video URL, the video title, author and
others. The information is stored in a dictionary which is then others. The information is stored in a dictionary which is then
passed to the FileDownloader. The FileDownloader processes this passed to the YoutubeDL. The YoutubeDL processes this
information possibly downloading the video to the file system, among information possibly downloading the video to the file system, among
other possible outcomes. other possible outcomes.
@@ -92,6 +92,8 @@ class InfoExtractor(object):
by this field, regardless of all other values. by this field, regardless of all other values.
-1 for default (order by other properties), -1 for default (order by other properties),
-2 or smaller for less than default. -2 or smaller for less than default.
< -1000 to hide the format (if there is
another one which is strictly better)
* language_preference Is this in the correct requested * language_preference Is this in the correct requested
language? language?
10 if it's what the URL is about, 10 if it's what the URL is about,
@@ -589,7 +591,7 @@ class InfoExtractor(object):
if display_name is None: if display_name is None:
display_name = name display_name = name
return self._html_search_regex( return self._html_search_regex(
r'''(?ix)<meta r'''(?isx)<meta
(?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1) (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
[^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name), [^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name),
html, display_name, fatal=fatal, group='content', **kwargs) html, display_name, fatal=fatal, group='content', **kwargs)

View File

@@ -0,0 +1,29 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import ExtractorError
class CommonMistakesIE(InfoExtractor):
IE_DESC = False # Do not list
_VALID_URL = r'''(?x)
(?:url|URL)
'''
_TESTS = [{
'url': 'url',
'only_matching': True,
}, {
'url': 'URL',
'only_matching': True,
}]
def _real_extract(self, url):
msg = (
'You\'ve asked youtube-dl to download the URL "%s". '
'That doesn\'t make any sense. '
'Simply remove the parameter in your command or configuration.'
) % url
if self._downloader.params.get('verbose'):
msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.'
raise ExtractorError(msg, expected=True)

View File

@@ -29,10 +29,9 @@ from .common import InfoExtractor
class CrunchyrollIE(SubtitlesInfoExtractor): class CrunchyrollIE(SubtitlesInfoExtractor):
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)' _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
_TEST = { _TESTS = [{
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
#'md5': 'b1639fd6ddfaa43788c85f6d1dddd412',
'info_dict': { 'info_dict': {
'id': '645513', 'id': '645513',
'ext': 'flv', 'ext': 'flv',
@@ -47,7 +46,10 @@ class CrunchyrollIE(SubtitlesInfoExtractor):
# rtmp # rtmp
'skip_download': True, 'skip_download': True,
}, },
} }, {
'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
'only_matching': True,
}]
_FORMAT_IDS = { _FORMAT_IDS = {
'360': ('60', '106'), '360': ('60', '106'),
@@ -226,7 +228,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False) video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False)
formats = [] formats = []
for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage): for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
stream_quality, stream_format = self._FORMAT_IDS[fmt] stream_quality, stream_format = self._FORMAT_IDS[fmt]
video_format = fmt + 'p' video_format = fmt + 'p'
streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/') streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')

View File

@@ -27,7 +27,6 @@ class CSpanIE(InfoExtractor):
'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models', 'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
# For whatever reason, the served video alternates between # For whatever reason, the served video alternates between
# two different ones # two different ones
#'md5': 'dbb0f047376d457f2ab8b3929cbb2d0c',
'info_dict': { 'info_dict': {
'id': '340723', 'id': '340723',
'ext': 'mp4', 'ext': 'mp4',

View File

@@ -38,7 +38,7 @@ class DaumIE(InfoExtractor):
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
webpage = self._download_webpage(canonical_url, video_id) webpage = self._download_webpage(canonical_url, video_id)
full_id = self._search_regex( full_id = self._search_regex(
r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]', r'src=["\']http://videofarm\.daum\.net/controller/video/viewer/Video\.html\?.*?vid=(.+?)[&"\']',
webpage, 'full id') webpage, 'full id')
query = compat_urllib_parse.urlencode({'vid': full_id}) query = compat_urllib_parse.urlencode({'vid': full_id})
info = self._download_xml( info = self._download_xml(

View File

@@ -4,6 +4,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
float_or_none, float_or_none,
int_or_none, int_or_none,
@@ -61,7 +62,7 @@ class DBTVIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': video['id'], 'id': compat_str(video['id']),
'display_id': display_id, 'display_id': display_id,
'title': video['title'], 'title': video['title'],
'description': clean_html(video['desc']), 'description': clean_html(video['desc']),

View File

@@ -0,0 +1,125 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
js_to_json,
unescapeHTML,
ExtractorError,
)
class DVTVIE(InfoExtractor):
IE_NAME = 'dvtv'
IE_DESC = 'http://video.aktualne.cz/'
_VALID_URL = r'http://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})'
_TESTS = [{
'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/',
'md5': '67cb83e4a955d36e1b5d31993134a0c2',
'info_dict': {
'id': 'dc0768de855511e49e4b0025900fea04',
'ext': 'mp4',
'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně',
}
}, {
'url': 'http://video.aktualne.cz/dvtv/stropnicky-policie-vrbetice-preventivne-nekontrolovala/r~82ed4322849211e4a10c0025900fea04/',
'md5': '6388f1941b48537dbd28791f712af8bf',
'info_dict': {
'id': '72c02230849211e49f60002590604f2e',
'ext': 'mp4',
'title': 'Stropnický: Policie Vrbětice preventivně nekontrolovala',
}
}, {
'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
'info_dict': {
'title': 'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci',
'id': '973eb3bc854e11e498be002590604f2e',
},
'playlist': [{
'md5': 'da7ca6be4935532241fa9520b3ad91e4',
'info_dict': {
'id': 'b0b40906854d11e4bdad0025900fea04',
'ext': 'mp4',
'title': 'Drtinová Veselovský TV 16. 12. 2014: Témata dne'
}
}, {
'md5': '5f7652a08b05009c1292317b449ffea2',
'info_dict': {
'id': '420ad9ec854a11e4bdad0025900fea04',
'ext': 'mp4',
'title': 'Školní masakr možná změní boj s Talibanem, říká novinářka'
}
}, {
'md5': '498eb9dfa97169f409126c617e2a3d64',
'info_dict': {
'id': '95d35580846a11e4b6d20025900fea04',
'ext': 'mp4',
'title': 'Boj o kliniku: Veřejný zájem, nebo právo na majetek?'
}
}, {
'md5': 'b8dc6b744844032dab6ba3781a7274b9',
'info_dict': {
'id': '6fe14d66853511e4833a0025900fea04',
'ext': 'mp4',
'title': 'Pánek: Odmítání syrských uprchlíků je ostudou české vlády'
}
}],
}, {
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
'only_matching': True,
}]
def _parse_video_metadata(self, js, video_id):
metadata = self._parse_json(js, video_id, transform_source=js_to_json)
formats = []
for video in metadata['sources']:
ext = video['type'][6:]
formats.append({
'url': video['file'],
'ext': ext,
'format_id': '%s-%s' % (ext, video['label']),
'height': int(video['label'].rstrip('p')),
'fps': 25,
})
self._sort_formats(formats)
return {
'id': metadata['mediaid'],
'title': unescapeHTML(metadata['title']),
'thumbnail': self._proto_relative_url(metadata['image'], 'http:'),
'formats': formats
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
# single video
item = self._search_regex(
r"(?s)embedData[0-9a-f]{32}\['asset'\]\s*=\s*(\{.+?\});",
webpage, 'video', default=None, fatal=False)
if item:
return self._parse_video_metadata(item, video_id)
# playlist
items = re.findall(
r"(?s)BBX\.context\.assets\['[0-9a-f]{32}'\]\.push\(({.+?})\);",
webpage)
if items:
return {
'_type': 'playlist',
'id': video_id,
'title': self._og_search_title(webpage),
'entries': [self._parse_video_metadata(i, video_id) for i in items]
}
raise ExtractorError('Could not find neither video nor playlist')

View File

@@ -0,0 +1,46 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class EchoMskIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)'
_TEST = {
'url': 'http://www.echo.msk.ru/sounds/1464134.html',
'md5': '2e44b3b78daff5b458e4dbc37f191f7c',
'info_dict': {
'id': '1464134',
'ext': 'mp3',
'title': 'Особое мнение - 29 декабря 2014, 19:08',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
audio_url = self._search_regex(
r'<a rel="mp3" href="([^"]+)">', webpage, 'audio URL')
title = self._html_search_regex(
r'<a href="/programs/[^"]+" target="_blank">([^<]+)</a>',
webpage, 'title')
air_date = self._html_search_regex(
r'(?s)<div class="date">(.+?)</div>',
webpage, 'date', fatal=False, default=None)
if air_date:
air_date = re.sub(r'(\s)\1+', r'\1', air_date)
if air_date:
title = '%s - %s' % (title, air_date)
return {
'id': video_id,
'url': audio_url,
'title': title,
}

View File

@@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import json import json
from .common import InfoExtractor from .common import InfoExtractor
@@ -12,32 +11,49 @@ from ..utils import (
class EllenTVIE(InfoExtractor): class EllenTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ellentv\.com/videos/(?P<id>[a-z0-9_-]+)' _VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)'
_TEST = { _TESTS = [{
'url': 'http://www.ellentv.com/videos/0-7jqrsr18/', 'url': 'http://www.ellentv.com/videos/0-7jqrsr18/',
'md5': 'e4af06f3bf0d5f471921a18db5764642', 'md5': 'e4af06f3bf0d5f471921a18db5764642',
'info_dict': { 'info_dict': {
'id': '0-7jqrsr18', 'id': '0-7jqrsr18',
'ext': 'mp4', 'ext': 'mp4',
'title': 'What\'s Wrong with These Photos? A Whole Lot', 'title': 'What\'s Wrong with These Photos? A Whole Lot',
'description': 'md5:35f152dc66b587cf13e6d2cf4fa467f6',
'timestamp': 1406876400, 'timestamp': 1406876400,
'upload_date': '20140801', 'upload_date': '20140801',
} }
} }, {
'url': 'http://ellentube.com/videos/0-dvzmabd5/',
'md5': '98238118eaa2bbdf6ad7f708e3e4f4eb',
'info_dict': {
'id': '0-dvzmabd5',
'ext': 'mp4',
'title': '1 year old twin sister makes her brother laugh',
'description': '1 year old twin sister makes her brother laugh',
'timestamp': 1419542075,
'upload_date': '20141225',
}
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_url = self._html_search_meta('VideoURL', webpage, 'url')
title = self._og_search_title(webpage, default=None) or self._search_regex(
r'pageName\s*=\s*"([^"]+)"', webpage, 'title')
description = self._html_search_meta(
'description', webpage, 'description') or self._og_search_description(webpage)
timestamp = parse_iso8601(self._search_regex( timestamp = parse_iso8601(self._search_regex(
r'<span class="publish-date"><time datetime="([^"]+)">', r'<span class="publish-date"><time datetime="([^"]+)">',
webpage, 'timestamp')) webpage, 'timestamp'))
return { return {
'id': video_id, 'id': video_id,
'title': self._og_search_title(webpage), 'url': video_url,
'url': self._html_search_meta('VideoURL', webpage, 'url'), 'title': title,
'description': description,
'timestamp': timestamp, 'timestamp': timestamp,
} }
@@ -55,8 +71,7 @@ class EllenTVClipsIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) playlist_id = self._match_id(url)
playlist_id = mobj.group('id')
webpage = self._download_webpage(url, playlist_id) webpage = self._download_webpage(url, playlist_id)
playlist = self._extract_playlist(webpage) playlist = self._extract_playlist(webpage)

View File

@@ -0,0 +1,45 @@
from __future__ import unicode_literals
from .common import InfoExtractor
class EroProfileIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
_TEST = {
'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
'info_dict': {
'id': '3733775',
'display_id': 'sexy-babe-softcore',
'ext': 'm4v',
'title': 'sexy babe softcore',
'thumbnail': 're:https?://.*\.jpg',
'age_limit': 18,
}
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
webpage, 'video id', default=None)
video_url = self._search_regex(
r'<source src="([^"]+)', webpage, 'video url')
title = self._html_search_regex(
r'Title:</th><td>([^<]+)</td>', webpage, 'title')
thumbnail = self._search_regex(
r'onclick="showVideoPlayer\(\)"><img src="([^"]+)',
webpage, 'thumbnail', fatal=False)
return {
'id': video_id,
'display_id': display_id,
'url': video_url,
'title': title,
'thumbnail': thumbnail,
'age_limit': 18,
}

View File

@@ -6,7 +6,9 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
xpath_with_ns, xpath_with_ns,
parse_iso8601 parse_iso8601,
float_or_none,
int_or_none,
) )
NAMESPACE_MAP = { NAMESPACE_MAP = {
@@ -21,21 +23,38 @@ RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/'
class GameOneIE(InfoExtractor): class GameOneIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P<id>\d+)'
_TEST = { _TESTS = [
'url': 'http://www.gameone.de/tv/288', {
'md5': '136656b7fb4c9cb4a8e2d500651c499b', 'url': 'http://www.gameone.de/tv/288',
'info_dict': { 'md5': '136656b7fb4c9cb4a8e2d500651c499b',
'id': '288', 'info_dict': {
'ext': 'mp4', 'id': '288',
'title': 'Game One - Folge 288', 'ext': 'mp4',
'duration': 1238, 'title': 'Game One - Folge 288',
'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg', 'duration': 1238,
'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1', 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg',
'age_limit': 16, 'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1',
'upload_date': '20140513', 'age_limit': 16,
'timestamp': 1399980122, 'upload_date': '20140513',
'timestamp': 1399980122,
}
},
{
'url': 'http://gameone.de/tv/220',
'md5': '5227ca74c4ae6b5f74c0510a7c48839e',
'info_dict': {
'id': '220',
'ext': 'mp4',
'upload_date': '20120918',
'description': 'Jet Set Radio HD, Tekken Tag Tournament 2, Source Filmmaker',
'timestamp': 1347971451,
'title': 'Game One - Folge 220',
'duration': 896.62,
'age_limit': 16,
}
} }
}
]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@@ -66,13 +85,13 @@ class GameOneIE(InfoExtractor):
video_id, video_id,
'Downloading media:content') 'Downloading media:content')
rendition_items = content.findall('.//rendition') rendition_items = content.findall('.//rendition')
duration = int(rendition_items[0].get('duration')) duration = float_or_none(rendition_items[0].get('duration'))
formats = [ formats = [
{ {
'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text), 'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text),
'width': int(r.get('width')), 'width': int_or_none(r.get('width')),
'height': int(r.get('height')), 'height': int_or_none(r.get('height')),
'tbr': int(r.get('bitrate')), 'tbr': int_or_none(r.get('bitrate')),
} }
for r in rendition_items for r in rendition_items
] ]
@@ -105,7 +124,8 @@ class GameOnePlaylistIE(InfoExtractor):
webpage = self._download_webpage('http://www.gameone.de/tv', 'TV') webpage = self._download_webpage('http://www.gameone.de/tv', 'TV')
max_id = max(map(int, re.findall(r'<a href="/tv/(\d+)"', webpage))) max_id = max(map(int, re.findall(r'<a href="/tv/(\d+)"', webpage)))
entries = [ entries = [
self.url_result('http://www.gameone.de/tv/%d' % video_id, 'GameOne') self.url_result('http://www.gameone.de/tv/%d' %
video_id, 'GameOne')
for video_id in range(max_id, 0, -1)] for video_id in range(max_id, 0, -1)]
return { return {

View File

@@ -23,6 +23,7 @@ from ..utils import (
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
unsmuggle_url, unsmuggle_url,
UnsupportedError,
url_basename, url_basename,
) )
from .brightcove import BrightcoveIE from .brightcove import BrightcoveIE
@@ -180,6 +181,14 @@ class GenericIE(InfoExtractor):
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.', 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
}, },
}, },
# BBC iPlayer embeds
{
'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
'info_dict': {
'title': 'BBC - Blogs - Adam Curtis - BUGGER',
},
'playlist_mincount': 18,
},
# RUTV embed # RUTV embed
{ {
'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html', 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
@@ -698,9 +707,9 @@ class GenericIE(InfoExtractor):
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader') r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
# Helper method # Helper method
def _playlist_from_matches(matches, getter, ie=None): def _playlist_from_matches(matches, getter=None, ie=None):
urlrs = orderedSet( urlrs = orderedSet(
self.url_result(self._proto_relative_url(getter(m)), ie) self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
for m in matches) for m in matches)
return self.playlist_result( return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title) urlrs, playlist_id=video_id, playlist_title=video_title)
@@ -904,6 +913,11 @@ class GenericIE(InfoExtractor):
return _playlist_from_matches( return _playlist_from_matches(
matches, getter=unescapeHTML, ie='FunnyOrDie') matches, getter=unescapeHTML, ie='FunnyOrDie')
# Look for BBC iPlayer embed
matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
if matches:
return _playlist_from_matches(matches, ie='BBCCoUk')
# Look for embedded RUTV player # Look for embedded RUTV player
rutv_url = RUTVIE._extract_url(webpage) rutv_url = RUTVIE._extract_url(webpage)
if rutv_url: if rutv_url:
@@ -1057,7 +1071,7 @@ class GenericIE(InfoExtractor):
'url': new_url, 'url': new_url,
} }
if not found: if not found:
raise ExtractorError('Unsupported URL: %s' % url) raise UnsupportedError(url)
entries = [] entries = []
for video_url in found: for video_url in found:

View File

@@ -0,0 +1,71 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
js_to_json,
remove_end,
)
class HellPornoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?hellporno\.com/videos/(?P<id>[^/]+)'
_TEST = {
'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/',
'md5': '1fee339c610d2049699ef2aa699439f1',
'info_dict': {
'id': '149116',
'display_id': 'dixie-is-posing-with-naked-ass-very-erotic',
'ext': 'mp4',
'title': 'Dixie is posing with naked ass very erotic',
'thumbnail': 're:https?://.*\.jpg$',
'age_limit': 18,
}
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
title = remove_end(self._html_search_regex(
r'<title>([^<]+)</title>', webpage, 'title'), ' - Hell Porno')
flashvars = self._parse_json(self._search_regex(
r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'),
display_id, transform_source=js_to_json)
video_id = flashvars.get('video_id')
thumbnail = flashvars.get('preview_url')
ext = flashvars.get('postfix', '.mp4')[1:]
formats = []
for video_url_key in ['video_url', 'video_alt_url']:
video_url = flashvars.get(video_url_key)
if not video_url:
continue
video_text = flashvars.get('%s_text' % video_url_key)
fmt = {
'url': video_url,
'ext': ext,
'format_id': video_text,
}
m = re.search(r'^(?P<height>\d+)[pP]', video_text)
if m:
fmt['height'] = int(m.group('height'))
formats.append(fmt)
self._sort_formats(formats)
categories = self._html_search_meta(
'keywords', webpage, 'categories', default='').split(',')
return {
'id': video_id,
'display_id': display_id,
'title': title,
'thumbnail': thumbnail,
'categories': categories,
'age_limit': 18,
'formats': formats,
}

View File

@@ -0,0 +1,166 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
parse_iso8601,
float_or_none,
int_or_none,
compat_str,
)
class HitboxIE(InfoExtractor):
IE_NAME = 'hitbox'
_VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.hitbox.tv/video/203213',
'info_dict': {
'id': '203213',
'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy',
'alt_title': 'hitboxlive - Aug 9th #6',
'description': '',
'ext': 'mp4',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 215.1666,
'resolution': 'HD 720p',
'uploader': 'hitboxlive',
'view_count': int,
'timestamp': 1407576133,
'upload_date': '20140809',
'categories': ['Live Show'],
},
'params': {
# m3u8 download
'skip_download': True,
},
}
def _extract_metadata(self, url, video_id):
thumb_base = 'https://edge.sf.hitbox.tv'
metadata = self._download_json(
'%s/%s' % (url, video_id), video_id)
date = 'media_live_since'
media_type = 'livestream'
if metadata.get('media_type') == 'video':
media_type = 'video'
date = 'media_date_added'
video_meta = metadata.get(media_type, [])[0]
title = video_meta.get('media_status')
alt_title = video_meta.get('media_title')
description = clean_html(
video_meta.get('media_description') or
video_meta.get('media_description_md'))
duration = float_or_none(video_meta.get('media_duration'))
uploader = video_meta.get('media_user_name')
views = int_or_none(video_meta.get('media_views'))
timestamp = parse_iso8601(video_meta.get(date), ' ')
categories = [video_meta.get('category_name')]
thumbs = [
{'url': thumb_base + video_meta.get('media_thumbnail'),
'width': 320,
'height': 180},
{'url': thumb_base + video_meta.get('media_thumbnail_large'),
'width': 768,
'height': 432},
]
return {
'id': video_id,
'title': title,
'alt_title': alt_title,
'description': description,
'ext': 'mp4',
'thumbnails': thumbs,
'duration': duration,
'uploader': uploader,
'view_count': views,
'timestamp': timestamp,
'categories': categories,
}
def _real_extract(self, url):
video_id = self._match_id(url)
metadata = self._extract_metadata(
'https://www.hitbox.tv/api/media/video',
video_id)
player_config = self._download_json(
'https://www.hitbox.tv/api/player/config/video/%s' % video_id,
video_id)
clip = player_config.get('clip')
video_url = clip.get('url')
res = clip.get('bitrates', [])[0].get('label')
metadata['resolution'] = res
metadata['url'] = video_url
metadata['protocol'] = 'm3u8'
return metadata
class HitboxLiveIE(HitboxIE):
IE_NAME = 'hitbox:live'
_VALID_URL = r'https?://(?:www\.)?hitbox\.tv/(?!video)(?P<id>.+)'
_TEST = {
'url': 'http://www.hitbox.tv/dimak',
'info_dict': {
'id': 'dimak',
'ext': 'mp4',
'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e',
'timestamp': int,
'upload_date': compat_str,
'title': compat_str,
'uploader': 'Dimak',
},
'params': {
# live
'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
metadata = self._extract_metadata(
'https://www.hitbox.tv/api/media/live',
video_id)
player_config = self._download_json(
'https://www.hitbox.tv/api/player/config/live/%s' % video_id,
video_id)
formats = []
cdns = player_config.get('cdns')
servers = []
for cdn in cdns:
base_url = cdn.get('netConnectionUrl')
host = re.search('.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
if base_url not in servers:
servers.append(base_url)
for stream in cdn.get('bitrates'):
label = stream.get('label')
if label != 'Auto':
formats.append({
'url': '%s/%s' % (base_url, stream.get('url')),
'ext': 'mp4',
'vbr': stream.get('bitrate'),
'resolution': label,
'rtmp_live': True,
'format_note': host,
'page_url': url,
'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
})
self._sort_formats(formats)
metadata['formats'] = formats
metadata['is_live'] = True
metadata['title'] = self._live_title(metadata.get('title'))
return metadata

View File

@@ -22,7 +22,7 @@ class InternetVideoArchiveIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'SKYFALL', 'title': 'SKYFALL',
'description': 'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.', 'description': 'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
'duration': 149, 'duration': 152,
}, },
} }

View File

@@ -10,13 +10,14 @@ from ..utils import int_or_none
class KontrTubeIE(InfoExtractor): class KontrTubeIE(InfoExtractor):
IE_NAME = 'kontrtube' IE_NAME = 'kontrtube'
IE_DESC = 'KontrTube.ru - Труба зовёт' IE_DESC = 'KontrTube.ru - Труба зовёт'
_VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/.+' _VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/'
_TEST = { _TEST = {
'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/', 'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
'md5': '975a991a4926c9a85f383a736a2e6b80', 'md5': '975a991a4926c9a85f383a736a2e6b80',
'info_dict': { 'info_dict': {
'id': '2678', 'id': '2678',
'display_id': 'nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Над олимпийской деревней в Сочи поднят российский флаг', 'title': 'Над олимпийской деревней в Сочи поднят российский флаг',
'description': 'md5:80edc4c613d5887ae8ccf1d59432be41', 'description': 'md5:80edc4c613d5887ae8ccf1d59432be41',
@@ -28,21 +29,28 @@ class KontrTubeIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, video_id, 'Downloading page') webpage = self._download_webpage(
url, display_id, 'Downloading page')
video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL') video_url = self._html_search_regex(
thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False) r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL')
thumbnail = self._html_search_regex(
r"preview_url\s*:\s*'(.+?)/?',", webpage, 'video thumbnail', fatal=False)
title = self._html_search_regex( title = self._html_search_regex(
r'<title>(.+?)</title>', webpage, 'video title') r'<title>(.+?)</title>', webpage, 'video title')
description = self._html_search_meta('description', webpage, 'video description') description = self._html_search_meta(
'description', webpage, 'video description')
mobj = re.search( mobj = re.search(
r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', webpage) r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
webpage)
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
view_count = self._html_search_regex( view_count = self._html_search_regex(
r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage, 'view count', fatal=False) r'<div class="col_2">Просмотров: <span>(\d+)</span></div>',
webpage, 'view count', fatal=False)
comment_count = None comment_count = None
comment_str = self._html_search_regex( comment_str = self._html_search_regex(
@@ -56,6 +64,7 @@ class KontrTubeIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id,
'url': video_url, 'url': video_url,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'title': title, 'title': title,

View File

@@ -17,7 +17,7 @@ from ..utils import (
class TechTVMITIE(InfoExtractor): class TechTVMITIE(InfoExtractor):
IE_NAME = 'techtv.mit.edu' IE_NAME = 'techtv.mit.edu'
_VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)' _VALID_URL = r'https?://techtv\.mit\.edu/(?:videos|embeds)/(?P<id>\d+)'
_TEST = { _TEST = {
'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set', 'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
@@ -31,8 +31,7 @@ class TechTVMITIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
raw_page = self._download_webpage( raw_page = self._download_webpage(
'http://techtv.mit.edu/videos/%s' % video_id, video_id) 'http://techtv.mit.edu/videos/%s' % video_id, video_id)
clean_page = re.compile(r'<!--.*?-->', re.S).sub('', raw_page) clean_page = re.compile(r'<!--.*?-->', re.S).sub('', raw_page)
@@ -106,7 +105,7 @@ class OCWMITIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence', 'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence',
'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.', 'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.',
#'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt' # 'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt'
} }
}, },
{ {
@@ -116,7 +115,7 @@ class OCWMITIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Session 1: Introduction to Derivatives', 'title': 'Session 1: Introduction to Derivatives',
'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.', 'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
#'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT' # 'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT'
} }
} }
] ]

View File

@@ -52,7 +52,8 @@ class MoeVideoIE(InfoExtractor):
'height': 296, 'height': 296,
'duration': 6027, 'duration': 6027,
'filesize': 588257923, 'filesize': 588257923,
} },
'skip': 'Video has been removed',
}, },
] ]

View File

@@ -1,7 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
@@ -64,8 +63,7 @@ class MooshareIE(InfoExtractor):
'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form)) 'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self.to_screen('%s: Waiting for timeout' % video_id) self._sleep(5, video_id)
time.sleep(5)
video_page = self._download_webpage(request, video_id, 'Downloading video page') video_page = self._download_webpage(request, video_id, 'Downloading video page')

View File

@@ -0,0 +1,35 @@
# coding: utf-8
from __future__ import unicode_literals
import datetime
from .common import InfoExtractor
class NerdCubedFeedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/feed\.json'
_TEST = {
'url': 'http://www.nerdcubed.co.uk/feed.json',
'info_dict': {
'title': 'nerdcubed.co.uk feed',
},
'playlist_mincount': 1300,
}
def _real_extract(self, url):
feed = self._download_json(url, url, "Downloading NerdCubed JSON feed")
entries = [{
'_type': 'url',
'title': feed_entry['title'],
'uploader': feed_entry['source']['name'] if feed_entry['source'] else None,
'upload_date': datetime.datetime.strptime(feed_entry['date'], '%Y-%m-%d').strftime('%Y%m%d'),
'url': "http://www.youtube.com/watch?v=" + feed_entry['youtube_id'],
} for feed_entry in feed]
return {
'_type': 'playlist',
'title': 'nerdcubed.co.uk feed',
'id': 'nerdcubed-feed',
'entries': entries,
}

View File

@@ -54,7 +54,7 @@ class NHLBaseInfoExtractor(InfoExtractor):
class NHLIE(NHLBaseInfoExtractor): class NHLIE(NHLBaseInfoExtractor):
IE_NAME = 'nhl.com' IE_NAME = 'nhl.com'
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[0-9a-z-]+)' _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614', 'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',

View File

@@ -9,6 +9,7 @@ from ..utils import (
qualities, qualities,
strip_jsonp, strip_jsonp,
url_basename, url_basename,
fix_xml_ampersands,
) )
@@ -51,7 +52,21 @@ class NPOIE(InfoExtractor):
'upload_date': '20130225', 'upload_date': '20130225',
'duration': 3000, 'duration': 3000,
}, },
} },
{
'url': 'http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706',
'info_dict': {
'id': 'WO_VPRO_043706',
'ext': 'wmv',
'title': 'De nieuwe mens - Deel 1',
'description': 'md5:518ae51ba1293ffb80d8d8ce90b74e4b',
'duration': 4680,
},
'params': {
# mplayer mms download
'skip_download': True,
}
},
] ]
def _real_extract(self, url): def _real_extract(self, url):
@@ -74,31 +89,58 @@ class NPOIE(InfoExtractor):
token = self._search_regex(r'npoplayer\.token = "(.+?)"', token_page, 'token') token = self._search_regex(r'npoplayer\.token = "(.+?)"', token_page, 'token')
formats = [] formats = []
quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std'])
for format_id in metadata['pubopties']: pubopties = metadata.get('pubopties')
format_info = self._download_json( if pubopties:
'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s' % (video_id, format_id, token), quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std'])
video_id, 'Downloading %s JSON' % format_id) for format_id in pubopties:
if format_info.get('error_code', 0) or format_info.get('errorcode', 0): format_info = self._download_json(
continue 'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s'
streams = format_info.get('streams') % (video_id, format_id, token),
if streams: video_id, 'Downloading %s JSON' % format_id)
video_info = self._download_json( if format_info.get('error_code', 0) or format_info.get('errorcode', 0):
streams[0] + '&type=json', continue
video_id, 'Downloading %s stream JSON' % format_id) streams = format_info.get('streams')
else: if streams:
video_info = format_info video_info = self._download_json(
video_url = video_info.get('url') streams[0] + '&type=json',
if not video_url: video_id, 'Downloading %s stream JSON' % format_id)
continue else:
if format_id == 'adaptive': video_info = format_info
formats.extend(self._extract_m3u8_formats(video_url, video_id)) video_url = video_info.get('url')
else: if not video_url:
continue
if format_id == 'adaptive':
formats.extend(self._extract_m3u8_formats(video_url, video_id))
else:
formats.append({
'url': video_url,
'format_id': format_id,
'quality': quality(format_id),
})
streams = metadata.get('streams')
if streams:
for i, stream in enumerate(streams):
stream_url = stream.get('url')
if not stream_url:
continue
asx = self._download_xml(
stream_url, video_id,
'Downloading stream %d ASX playlist' % i,
transform_source=fix_xml_ampersands)
ref = asx.find('./ENTRY/Ref')
if ref is None:
continue
video_url = ref.get('href')
if not video_url:
continue
formats.append({ formats.append({
'url': video_url, 'url': video_url,
'format_id': format_id, 'ext': stream.get('formaat', 'asf'),
'quality': quality(format_id), 'quality': stream.get('kwaliteit'),
}) })
self._sort_formats(formats) self._sort_formats(formats)
return { return {

View File

@@ -77,7 +77,7 @@ class NRKTVIE(InfoExtractor):
_TESTS = [ _TESTS = [
{ {
'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', 'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
'md5': '7b96112fbae1faf09a6f9ae1aff6cb84', 'md5': 'adf2c5454fa2bf032f47a9f8fb351342',
'info_dict': { 'info_dict': {
'id': 'MUHH48000314', 'id': 'MUHH48000314',
'ext': 'flv', 'ext': 'flv',
@@ -89,7 +89,7 @@ class NRKTVIE(InfoExtractor):
}, },
{ {
'url': 'http://tv.nrk.no/program/mdfp15000514', 'url': 'http://tv.nrk.no/program/mdfp15000514',
'md5': 'af01795a31f1cf7265c8657534d8077b', 'md5': '383650ece2b25ecec996ad7b5bb2a384',
'info_dict': { 'info_dict': {
'id': 'mdfp15000514', 'id': 'mdfp15000514',
'ext': 'flv', 'ext': 'flv',

View File

@@ -0,0 +1,70 @@
from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
compat_urllib_parse,
parse_age_limit,
int_or_none,
)
class OpenFilmIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)openfilm\.com/videos/(?P<id>.+)'
_TEST = {
'url': 'http://www.openfilm.com/videos/human-resources-remastered',
'md5': '42bcd88c2f3ec13b65edf0f8ad1cac37',
'info_dict': {
'id': '32736',
'display_id': 'human-resources-remastered',
'ext': 'mp4',
'title': 'Human Resources (Remastered)',
'description': 'Social Engineering in the 20th Century.',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 7164,
'timestamp': 1334756988,
'upload_date': '20120418',
'uploader_id': '41117',
'view_count': int,
'age_limit': 0,
},
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
player = compat_urllib_parse.unquote_plus(
self._og_search_video_url(webpage))
video = json.loads(self._search_regex(
r'\bp=({.+?})(?:&|$)', player, 'video JSON'))
video_url = '%s1.mp4' % video['location']
video_id = video.get('video_id')
display_id = video.get('alias') or display_id
title = video.get('title')
description = video.get('description')
thumbnail = video.get('main_thumb')
duration = int_or_none(video.get('duration'))
timestamp = parse_iso8601(video.get('dt_published'), ' ')
uploader_id = video.get('user_id')
view_count = int_or_none(video.get('views_count'))
age_limit = parse_age_limit(video.get('age_limit'))
return {
'id': video_id,
'display_id': display_id,
'url': video_url,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'timestamp': timestamp,
'uploader_id': uploader_id,
'view_count': view_count,
'age_limit': age_limit,
}

View File

@@ -17,9 +17,9 @@ from ..utils import (
class ORFTVthekIE(InfoExtractor): class ORFTVthekIE(InfoExtractor):
IE_NAME = 'orf:tvthek' IE_NAME = 'orf:tvthek'
IE_DESC = 'ORF TVthek' IE_DESC = 'ORF TVthek'
_VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)' _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics?/.+?|program/[^/]+)/(?P<id>\d+)'
_TEST = { _TESTS = [{
'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389', 'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',
'playlist': [{ 'playlist': [{
'md5': '2942210346ed779588f428a92db88712', 'md5': '2942210346ed779588f428a92db88712',
@@ -32,8 +32,21 @@ class ORFTVthekIE(InfoExtractor):
'upload_date': '20141208', 'upload_date': '20141208',
}, },
}], }],
'skip': 'Blocked outside of Austria', 'skip': 'Blocked outside of Austria / Germany',
} }, {
'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256',
'playlist': [{
'md5': '68f543909aea49d621dfc7703a11cfaf',
'info_dict': {
'id': '7982259',
'ext': 'mp4',
'title': 'Best of Ingrid Thurnher',
'upload_date': '20140527',
'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".',
}
}],
'_skip': 'Blocked outside of Austria / Germany',
}]
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = self._match_id(url) playlist_id = self._match_id(url)
@@ -45,7 +58,9 @@ class ORFTVthekIE(InfoExtractor):
def get_segments(all_data): def get_segments(all_data):
for data in all_data: for data in all_data:
if data['name'] == 'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM': if data['name'] in (
'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM',
'Tracker::EPISODE_DETAIL_PAGE_OVER_TOPIC'):
return data['values']['segments'] return data['values']['segments']
sdata = get_segments(all_data) sdata = get_segments(all_data)

View File

@@ -4,6 +4,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError,
unified_strdate, unified_strdate,
US_RATINGS, US_RATINGS,
) )
@@ -151,6 +152,19 @@ class PBSIE(InfoExtractor):
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
info = self._download_json(info_url, display_id) info = self._download_json(info_url, display_id)
redirect_url = info['alternate_encoding']['url']
redirect_info = self._download_json(
redirect_url + '?format=json', display_id,
'Downloading video url info')
if redirect_info['status'] == 'error':
if redirect_info['http_code'] == 403:
message = (
'The video is not available in your region due to '
'right restrictions')
else:
message = redirect_info['message']
raise ExtractorError(message, expected=True)
rating_str = info.get('rating') rating_str = info.get('rating')
if rating_str is not None: if rating_str is not None:
rating_str = rating_str.rpartition('-')[2] rating_str = rating_str.rpartition('-')[2]
@@ -160,7 +174,7 @@ class PBSIE(InfoExtractor):
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': info['title'], 'title': info['title'],
'url': info['alternate_encoding']['url'], 'url': redirect_info['url'],
'ext': 'mp4', 'ext': 'mp4',
'description': info['program'].get('description'), 'description': info['program'].get('description'),
'thumbnail': info.get('image_url'), 'thumbnail': info.get('image_url'),

View File

@@ -26,6 +26,7 @@ class PlayedIE(InfoExtractor):
'ext': 'flv', 'ext': 'flv',
'title': 'youtube-dl_test_video.mp4', 'title': 'youtube-dl_test_video.mp4',
}, },
'skip': 'Removed for copyright infringement.', # oh wow
} }
def _real_extract(self, url): def _real_extract(self, url):

View File

@@ -8,11 +8,11 @@ from ..utils import js_to_json
class RTPIE(InfoExtractor): class RTPIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/e(?P<id>[0-9]+)/?' _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?'
_TEST = { _TESTS = [{
'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas', 'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
'info_dict': { 'info_dict': {
'id': '174042', 'id': 'e174042',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Paixões Cruzadas', 'title': 'Paixões Cruzadas',
'description': 'As paixões musicais de António Cartaxo e António Macedo', 'description': 'As paixões musicais de António Cartaxo e António Macedo',
@@ -21,7 +21,10 @@ class RTPIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, # RTMP download 'skip_download': True, # RTMP download
}, },
} }, {
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)

View File

@@ -0,0 +1,49 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
ExtractorError,
js_to_json,
)
class ScreencastOMaticIE(InfoExtractor):
_VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P<id>[0-9a-zA-Z]+)'
_TEST = {
'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl',
'md5': '483583cb80d92588f15ccbedd90f0c18',
'info_dict': {
'id': 'c2lD3BeOPl',
'ext': 'mp4',
'title': 'Welcome to 3-4 Philosophy @ DECV!',
'thumbnail': 're:^https?://.*\.jpg$',
'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
setup_js = self._search_regex(
r"(?s)jwplayer\('mp4Player'\).setup\((\{.*?\})\);",
webpage, 'setup code')
data = self._parse_json(setup_js, video_id, transform_source=js_to_json)
try:
video_data = next(
m for m in data['modes'] if m.get('type') == 'html5')
except StopIteration:
raise ExtractorError('Could not find any video entries!')
video_url = compat_urlparse.urljoin(url, video_data['config']['file'])
thumbnail = data.get('image')
return {
'id': video_id,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'url': video_url,
'ext': 'mp4',
'thumbnail': thumbnail,
}

View File

@@ -30,7 +30,7 @@ class SlideshareIE(InfoExtractor):
page_title = mobj.group('title') page_title = mobj.group('title')
webpage = self._download_webpage(url, page_title) webpage = self._download_webpage(url, page_title)
slideshare_obj = self._search_regex( slideshare_obj = self._search_regex(
r'var slideshare_object = ({.*?}); var user_info =', r'var\s+slideshare_object\s*=\s*({.*?});\s*var\s+user_info\s*=',
webpage, 'slideshare object') webpage, 'slideshare object')
info = json.loads(slideshare_obj) info = json.loads(slideshare_obj)
if info['slideshow']['type'] != 'video': if info['slideshow']['type'] != 'video':
@@ -41,7 +41,7 @@ class SlideshareIE(InfoExtractor):
ext = info['jsplayer']['video_extension'] ext = info['jsplayer']['video_extension']
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext) video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
description = self._html_search_regex( description = self._html_search_regex(
r'<p\s+(?:style="[^"]*"\s+)?class=".*?description.*?"[^>]*>(.*?)</p>', webpage, r'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>', webpage,
'description', fatal=False) 'description', fatal=False)
return { return {

View File

@@ -69,6 +69,7 @@ class SmotriIE(InfoExtractor):
'params': { 'params': {
'videopassword': 'qwerty', 'videopassword': 'qwerty',
}, },
'skip': 'Video is not approved by moderator',
}, },
# age limit + video-password # age limit + video-password
{ {
@@ -86,7 +87,8 @@ class SmotriIE(InfoExtractor):
}, },
'params': { 'params': {
'videopassword': '333' 'videopassword': '333'
} },
'skip': 'Video is not approved by moderator',
}, },
# swf player # swf player
{ {

View File

@@ -1,11 +1,10 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError from .common import compat_str
class SohuIE(InfoExtractor): class SohuIE(InfoExtractor):
@@ -29,60 +28,73 @@ class SohuIE(InfoExtractor):
base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid=' base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid='
else: else:
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid=' base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
data_url = base_data_url + str(vid_id)
data_json = self._download_webpage( return self._download_json(
data_url, video_id, base_data_url + vid_id, video_id,
note='Downloading JSON data for ' + str(vid_id)) 'Downloading JSON data for %s' % vid_id)
return json.loads(data_json)
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
mytv = mobj.group('mytv') is not None mytv = mobj.group('mytv') is not None
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>', raw_title = self._html_search_regex(
webpage, 'video title') r'(?s)<title>(.+?)</title>',
webpage, 'video title')
title = raw_title.partition('-')[0].strip() title = raw_title.partition('-')[0].strip()
vid = self._html_search_regex(r'var vid ?= ?["\'](\d+)["\']', webpage, vid = self._html_search_regex(
'video path') r'var vid ?= ?["\'](\d+)["\']',
data = _fetch_data(vid, mytv) webpage, 'video path')
vid_data = _fetch_data(vid, mytv)
QUALITIES = ('ori', 'super', 'high', 'nor') formats_json = {}
vid_ids = [data['data'][q + 'Vid'] for format_id in ('nor', 'high', 'super', 'ori', 'h2644k', 'h2654k'):
for q in QUALITIES vid_id = vid_data['data'].get('%sVid' % format_id)
if data['data'][q + 'Vid'] != 0] if not vid_id:
if not vid_ids: continue
raise ExtractorError('No formats available for this video') vid_id = compat_str(vid_id)
formats_json[format_id] = vid_data if vid == vid_id else _fetch_data(vid_id, mytv)
# For now, we just pick the highest available quality part_count = vid_data['data']['totalBlocks']
vid_id = vid_ids[-1]
format_data = data if vid == vid_id else _fetch_data(vid_id, mytv)
part_count = format_data['data']['totalBlocks']
allot = format_data['allot']
prot = format_data['prot']
clipsURL = format_data['data']['clipsURL']
su = format_data['data']['su']
playlist = [] playlist = []
for i in range(part_count): for i in range(part_count):
part_url = ('http://%s/?prot=%s&file=%s&new=%s' % formats = []
(allot, prot, clipsURL[i], su[i])) for format_id, format_data in formats_json.items():
part_str = self._download_webpage( allot = format_data['allot']
part_url, video_id, prot = format_data['prot']
note='Downloading part %d of %d' % (i + 1, part_count))
part_info = part_str.split('|') data = format_data['data']
video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3]) clips_url = data['clipsURL']
su = data['su']
video_info = { part_str = self._download_webpage(
'id': '%s_part%02d' % (video_id, i + 1), 'http://%s/?prot=%s&file=%s&new=%s' %
(allot, prot, clips_url[i], su[i]),
video_id,
'Downloading %s video URL part %d of %d'
% (format_id, i + 1, part_count))
part_info = part_str.split('|')
video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
formats.append({
'url': video_url,
'format_id': format_id,
'filesize': data['clipsBytes'][i],
'width': data['width'],
'height': data['height'],
'fps': data['fps'],
})
self._sort_formats(formats)
playlist.append({
'id': '%s_part%d' % (video_id, i + 1),
'title': title, 'title': title,
'url': video_url, 'duration': vid_data['data']['clipsDuration'][i],
'ext': 'mp4', 'formats': formats,
} })
playlist.append(video_info)
if len(playlist) == 1: if len(playlist) == 1:
info = playlist[0] info = playlist[0]

View File

@@ -60,9 +60,10 @@ class SportDeutschlandIE(InfoExtractor):
categories = list(data.get('section', {}).get('tags', {}).values()) categories = list(data.get('section', {}).get('tags', {}).values())
asset = data['asset'] asset = data['asset']
assets_info = self._download_json(asset['url'], video_id)
formats = [] formats = []
smil_url = asset['video'] smil_url = assets_info['video']
if '.smil' in smil_url: if '.smil' in smil_url:
m3u8_url = smil_url.replace('.smil', '.m3u8') m3u8_url = smil_url.replace('.smil', '.m3u8')
formats.extend( formats.extend(

View File

@@ -2,7 +2,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
@@ -40,8 +39,7 @@ class StreamcloudIE(InfoExtractor):
''', orig_webpage) ''', orig_webpage)
post = compat_urllib_parse.urlencode(fields) post = compat_urllib_parse.urlencode(fields)
self.to_screen('%s: Waiting for timeout' % video_id) self._sleep(12, video_id)
time.sleep(12)
headers = { headers = {
b'Content-Type': b'application/x-www-form-urlencoded', b'Content-Type': b'application/x-www-form-urlencoded',
} }

View File

@@ -28,23 +28,27 @@ class SunPornoIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title') title = self._html_search_regex(
description = self._html_search_meta('description', webpage, 'description') r'<title>([^<]+)</title>', webpage, 'title')
description = self._html_search_meta(
'description', webpage, 'description')
thumbnail = self._html_search_regex( thumbnail = self._html_search_regex(
r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False) r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
duration = parse_duration(self._search_regex( duration = parse_duration(self._search_regex(
r'Duration:\s*(\d+:\d+)\s*<', webpage, 'duration', fatal=False)) r'itemprop="duration">\s*(\d+:\d+)\s*<',
webpage, 'duration', fatal=False))
view_count = int_or_none(self._html_search_regex( view_count = int_or_none(self._html_search_regex(
r'class="views">\s*(\d+)\s*<', webpage, 'view count', fatal=False)) r'class="views">\s*(\d+)\s*<',
webpage, 'view count', fatal=False))
comment_count = int_or_none(self._html_search_regex( comment_count = int_or_none(self._html_search_regex(
r'(\d+)</b> Comments?', webpage, 'comment count', fatal=False)) r'(\d+)</b> Comments?',
webpage, 'comment count', fatal=False))
formats = [] formats = []
quality = qualities(['mp4', 'flv']) quality = qualities(['mp4', 'flv'])

View File

@@ -6,7 +6,7 @@ from .mitele import MiTeleIE
class TelecincoIE(MiTeleIE): class TelecincoIE(MiTeleIE):
IE_NAME = 'telecinco.es' IE_NAME = 'telecinco.es'
_VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<episode>.*?)\.html' _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<id>.*?)\.html'
_TEST = { _TEST = {
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html', 'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',

View File

@@ -0,0 +1,53 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import unified_strdate
class TeleTaskIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tele-task\.de/archive/video/html5/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.tele-task.de/archive/video/html5/26168/',
'info_dict': {
'title': 'Duplicate Detection',
},
'playlist': [{
'md5': '290ef69fb2792e481169c3958dbfbd57',
'info_dict': {
'id': '26168-speaker',
'ext': 'mp4',
'title': 'Duplicate Detection',
'upload_date': '20141218',
}
}, {
'md5': 'e1e7218c5f0e4790015a437fcf6c71b4',
'info_dict': {
'id': '26168-slides',
'ext': 'mp4',
'title': 'Duplicate Detection',
'upload_date': '20141218',
}
}]
}
def _real_extract(self, url):
lecture_id = self._match_id(url)
webpage = self._download_webpage(url, lecture_id)
title = self._html_search_regex(
r'itemprop="name">([^<]+)</a>', webpage, 'title')
upload_date = unified_strdate(self._html_search_regex(
r'Date:</td><td>([^<]+)</td>', webpage, 'date', fatal=False))
entries = [{
'id': '%s-%s' % (lecture_id, format_id),
'url': video_url,
'title': title,
'upload_date': upload_date,
} for format_id, video_url in re.findall(
r'<video class="([^"]+)"[^>]*>\s*<source src="([^"]+)"', webpage)]
return self.playlist_result(entries, lecture_id, title)

View File

@@ -8,7 +8,6 @@ class TenPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ten(play)?\.com\.au/.+' _VALID_URL = r'https?://(?:www\.)?ten(play)?\.com\.au/.+'
_TEST = { _TEST = {
'url': 'http://tenplay.com.au/ten-insider/extra/season-2013/tenplay-tv-your-way', 'url': 'http://tenplay.com.au/ten-insider/extra/season-2013/tenplay-tv-your-way',
#'md5': 'd68703d9f73dc8fccf3320ab34202590',
'info_dict': { 'info_dict': {
'id': '2695695426001', 'id': '2695695426001',
'ext': 'flv', 'ext': 'flv',

View File

@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re import re
import json import json
from .common import InfoExtractor from .subtitles import SubtitlesInfoExtractor
from ..compat import ( from ..compat import (
compat_str, compat_str,
) )
@@ -16,7 +16,7 @@ from ..utils import (
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'}) _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
class ThePlatformIE(InfoExtractor): class ThePlatformIE(SubtitlesInfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/ (?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/
(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)? (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
@@ -66,6 +66,20 @@ class ThePlatformIE(InfoExtractor):
info_json = self._download_webpage(info_url, video_id) info_json = self._download_webpage(info_url, video_id)
info = json.loads(info_json) info = json.loads(info_json)
subtitles = {}
captions = info.get('captions')
if isinstance(captions, list):
for caption in captions:
lang, src = caption.get('lang'), caption.get('src')
if lang and src:
subtitles[lang] = src
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
subtitles = self.extract_subtitles(video_id, subtitles)
head = meta.find(_x('smil:head')) head = meta.find(_x('smil:head'))
body = meta.find(_x('smil:body')) body = meta.find(_x('smil:body'))
@@ -117,6 +131,7 @@ class ThePlatformIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': info['title'], 'title': info['title'],
'subtitles': subtitles,
'formats': formats, 'formats': formats,
'description': info['description'], 'description': info['description'],
'thumbnail': info['defaultThumbnailUrl'], 'thumbnail': info['defaultThumbnailUrl'],

View File

@@ -15,7 +15,7 @@ class TMZIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!', 'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!',
'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie??? Or is she just showing off her amazing boobs?', 'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie??? Or is she just showing off her amazing boobs?',
'thumbnail': 'http://cdnbakmi.kaltura.com/p/591531/sp/59153100/thumbnail/entry_id/0_okj015ty/version/100002/acv/182/width/640', 'thumbnail': r're:http://cdnbakmi\.kaltura\.com/.*thumbnail.*',
} }
} }

View File

@@ -12,7 +12,7 @@ from ..utils import (
class TNAFlixIE(InfoExtractor): class TNAFlixIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tnaflix\.com/(?P<cat_id>[\w-]+)/(?P<display_id>[\w-]+)/video(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/(?P<cat_id>[\w-]+)/(?P<display_id>[\w-]+)/video(?P<id>\d+)'
_TITLE_REGEX = None _TITLE_REGEX = r'<title>(.+?) - TNAFlix Porn Videos</title>'
_DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>' _DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
_CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"' _CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
@@ -49,8 +49,8 @@ class TNAFlixIE(InfoExtractor):
if duration: if duration:
duration = parse_duration(duration[1:]) duration = parse_duration(duration[1:])
cfg_url = self._html_search_regex( cfg_url = self._proto_relative_url(self._html_search_regex(
self._CONFIG_REGEX, webpage, 'flashvars.config') self._CONFIG_REGEX, webpage, 'flashvars.config'), 'http:')
cfg_xml = self._download_xml( cfg_xml = self._download_xml(
cfg_url, display_id, note='Downloading metadata', cfg_url, display_id, note='Downloading metadata',

View File

@@ -22,6 +22,7 @@ class TwitchIE(InfoExtractor):
_VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?twitch\.tv/ _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?twitch\.tv/
(?: (?:
(?P<channelid>[^/]+)| (?P<channelid>[^/]+)|
(?:(?:[^/]+)/v/(?P<vodid>[^/]+))|
(?:(?:[^/]+)/b/(?P<videoid>[^/]+))| (?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
(?:(?:[^/]+)/c/(?P<chapterid>[^/]+)) (?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
) )
@@ -70,11 +71,24 @@ class TwitchIE(InfoExtractor):
def _extract_media(self, item, item_id): def _extract_media(self, item, item_id):
ITEMS = { ITEMS = {
'a': 'video', 'a': 'video',
'v': 'vod',
'c': 'chapter', 'c': 'chapter',
} }
info = self._extract_info(self._download_json( info = self._extract_info(self._download_json(
'%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id, '%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
'Downloading %s info JSON' % ITEMS[item])) 'Downloading %s info JSON' % ITEMS[item]))
if item == 'v':
access_token = self._download_json(
'%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id,
'Downloading %s access token' % ITEMS[item])
formats = self._extract_m3u8_formats(
'http://usher.twitch.tv/vod/%s?nauth=%s&nauthsig=%s'
% (item_id, access_token['token'], access_token['sig']),
item_id, 'mp4')
info['formats'] = formats
return info
response = self._download_json( response = self._download_json(
'%s/api/videos/%s%s' % (self._API_BASE, item, item_id), item_id, '%s/api/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
'Downloading %s playlist JSON' % ITEMS[item]) 'Downloading %s playlist JSON' % ITEMS[item])
@@ -209,6 +223,8 @@ class TwitchIE(InfoExtractor):
""" """
elif mobj.group('videoid'): elif mobj.group('videoid'):
return self._extract_media('a', mobj.group('videoid')) return self._extract_media('a', mobj.group('videoid'))
elif mobj.group('vodid'):
return self._extract_media('v', mobj.group('vodid'))
elif mobj.group('channelid'): elif mobj.group('channelid'):
channel_id = mobj.group('channelid') channel_id = mobj.group('channelid')
info = self._download_json( info = self._download_json(

View File

@@ -0,0 +1,118 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class VierIE(InfoExtractor):
IE_NAME = 'vier'
_VALID_URL = r'https?://(?:www\.)?vier\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
_TESTS = [{
'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',
'info_dict': {
'id': '16129',
'display_id': 'het-wordt-warm-de-moestuin',
'ext': 'mp4',
'title': 'Het wordt warm in De Moestuin',
'description': 'De vele uren werk eisen hun tol. Wim droomt van assistentie...',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen',
'only_matching': True,
}, {
'url': 'http://www.vier.be/video/v3/embed/16129',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
embed_id = mobj.group('embed_id')
display_id = mobj.group('display_id') or embed_id
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'"nid"\s*:\s*"(\d+)"', webpage, 'video id')
application = self._search_regex(
r'"application"\s*:\s*"([^"]+)"', webpage, 'application', default='vier_vod')
filename = self._search_regex(
r'"filename"\s*:\s*"([^"]+)"', webpage, 'filename')
playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename)
formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4')
title = self._og_search_title(webpage, default=display_id)
description = self._og_search_description(webpage, default=None)
thumbnail = self._og_search_thumbnail(webpage, default=None)
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'formats': formats,
}
class VierVideosIE(InfoExtractor):
IE_NAME = 'vier:videos'
_VALID_URL = r'https?://(?:www\.)?vier\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)'
_TESTS = [{
'url': 'http://www.vier.be/demoestuin/videos',
'info_dict': {
'id': 'demoestuin',
},
'playlist_mincount': 153,
}, {
'url': 'http://www.vier.be/demoestuin/videos?page=6',
'info_dict': {
'id': 'demoestuin-page6',
},
'playlist_mincount': 20,
}, {
'url': 'http://www.vier.be/demoestuin/videos?page=7',
'info_dict': {
'id': 'demoestuin-page7',
},
'playlist_mincount': 13,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
program = mobj.group('program')
webpage = self._download_webpage(url, program)
page_id = mobj.group('page')
if page_id:
page_id = int(page_id)
start_page = page_id
last_page = start_page + 1
playlist_id = '%s-page%d' % (program, page_id)
else:
start_page = 0
last_page = int(self._search_regex(
r'videos\?page=(\d+)">laatste</a>',
webpage, 'last page', default=0)) + 1
playlist_id = program
entries = []
for current_page_id in range(start_page, last_page):
current_page = self._download_webpage(
'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id),
program,
'Downloading page %d' % (current_page_id + 1)) if current_page_id != page_id else webpage
page_entries = [
self.url_result('http://www.vier.be' + video_url, 'Vier')
for video_url in re.findall(
r'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)]
entries.extend(page_entries)
return self.playlist_result(entries, playlist_id)

View File

@@ -1,46 +1,42 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
parse_iso8601,
float_or_none,
int_or_none, int_or_none,
parse_filesize,
unified_strdate,
) )
class XboxClipsIE(InfoExtractor): class XboxClipsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?xboxclips\.com/video\.php\?.*vid=(?P<id>[\w-]{36})' _VALID_URL = r'https?://(?:www\.)?xboxclips\.com/(?:video\.php\?.*vid=|[^/]+/)(?P<id>[\w-]{36})'
_TEST = { _TEST = {
'url': 'https://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325', 'url': 'https://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
'md5': 'fbe1ec805e920aeb8eced3c3e657df5d', 'md5': 'fbe1ec805e920aeb8eced3c3e657df5d',
'info_dict': { 'info_dict': {
'id': '074a69a9-5faf-46aa-b93b-9909c1720325', 'id': '074a69a9-5faf-46aa-b93b-9909c1720325',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Iabdulelah playing Upload Studio', 'title': 'Iabdulelah playing Titanfall',
'filesize_approx': 28101836.8, 'filesize_approx': 26800000,
'timestamp': 1407388500,
'upload_date': '20140807', 'upload_date': '20140807',
'duration': 56, 'duration': 56,
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex( video_url = self._html_search_regex(
r'>Link: <a href="([^"]+)">', webpage, 'video URL') r'>(?:Link|Download): <a href="([^"]+)">', webpage, 'video URL')
title = self._html_search_regex( title = self._html_search_regex(
r'<title>XboxClips \| ([^<]+)</title>', webpage, 'title') r'<title>XboxClips \| ([^<]+)</title>', webpage, 'title')
timestamp = parse_iso8601(self._html_search_regex( upload_date = unified_strdate(self._html_search_regex(
r'>Recorded: ([^<]+)<', webpage, 'upload date', fatal=False)) r'>Recorded: ([^<]+)<', webpage, 'upload date', fatal=False))
filesize = float_or_none(self._html_search_regex( filesize = parse_filesize(self._html_search_regex(
r'>Size: ([\d\.]+)MB<', webpage, 'file size', fatal=False), invscale=1024 * 1024) r'>Size: ([^<]+)<', webpage, 'file size', fatal=False))
duration = int_or_none(self._html_search_regex( duration = int_or_none(self._html_search_regex(
r'>Duration: (\d+) Seconds<', webpage, 'duration', fatal=False)) r'>Duration: (\d+) Seconds<', webpage, 'duration', fatal=False))
view_count = int_or_none(self._html_search_regex( view_count = int_or_none(self._html_search_regex(
@@ -50,7 +46,7 @@ class XboxClipsIE(InfoExtractor):
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_url,
'title': title, 'title': title,
'timestamp': timestamp, 'upload_date': upload_date,
'filesize_approx': filesize, 'filesize_approx': filesize,
'duration': duration, 'duration': duration,
'view_count': view_count, 'view_count': view_count,

View File

@@ -14,7 +14,7 @@ from ..utils import (
class XHamsterIE(InfoExtractor): class XHamsterIE(InfoExtractor):
"""Information Extractor for xHamster""" """Information Extractor for xHamster"""
_VALID_URL = r'http://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?' _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
_TESTS = [ _TESTS = [
{ {
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
@@ -39,7 +39,11 @@ class XHamsterIE(InfoExtractor):
'duration': 200, 'duration': 200,
'age_limit': 18, 'age_limit': 18,
} }
} },
{
'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
'only_matching': True,
},
] ]
def _real_extract(self, url): def _real_extract(self, url):
@@ -57,7 +61,8 @@ class XHamsterIE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
seo = mobj.group('seo') seo = mobj.group('seo')
mrss_url = 'http://xhamster.com/movies/%s/%s.html' % (video_id, seo) proto = mobj.group('proto')
mrss_url = '%s://xhamster.com/movies/%s/%s.html' % (proto, video_id, seo)
webpage = self._download_webpage(mrss_url, video_id) webpage = self._download_webpage(mrss_url, video_id)
title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', webpage, 'title') title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', webpage, 'title')

View File

@@ -1,11 +1,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_request, compat_urllib_request,
compat_urllib_parse,
) )
from ..utils import ( from ..utils import (
parse_duration, parse_duration,
@@ -14,7 +14,7 @@ from ..utils import (
class XTubeIE(InfoExtractor): class XTubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<id>[^/?&]+))' _VALID_URL = r'https?://(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<id>[^/?&#]+))'
_TEST = { _TEST = {
'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_', 'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_',
'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab', 'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab',
@@ -30,41 +30,49 @@ class XTubeIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
url = 'http://www.' + mobj.group('url')
req = compat_urllib_request.Request(url) req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1') req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)
video_title = self._html_search_regex(r'<p class="title">([^<]+)', webpage, 'title') video_title = self._html_search_regex(
r'<p class="title">([^<]+)', webpage, 'title')
video_uploader = self._html_search_regex( video_uploader = self._html_search_regex(
r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False) [r"var\s+contentOwnerId\s*=\s*'([^']+)",
r'By:\s*<a href="/community/profile\.php\?user=([^"]+)'],
webpage, 'uploader', fatal=False)
video_description = self._html_search_regex( video_description = self._html_search_regex(
r'<p class="fieldsDesc">([^<]+)', webpage, 'description', fatal=False) r'<p class="fieldsDesc">([^<]+)',
webpage, 'description', fatal=False)
duration = parse_duration(self._html_search_regex( duration = parse_duration(self._html_search_regex(
r'<span class="bold">Runtime:</span> ([^<]+)</p>', webpage, 'duration', fatal=False)) r'<span class="bold">Runtime:</span> ([^<]+)</p>',
view_count = self._html_search_regex( webpage, 'duration', fatal=False))
r'<span class="bold">Views:</span> ([\d,\.]+)</p>', webpage, 'view count', fatal=False) view_count = str_to_int(self._html_search_regex(
if view_count: r'<span class="bold">Views:</span> ([\d,\.]+)</p>',
view_count = str_to_int(view_count) webpage, 'view count', fatal=False))
comment_count = self._html_search_regex( comment_count = str_to_int(self._html_search_regex(
r'<div id="commentBar">([\d,\.]+) Comments</div>', webpage, 'comment count', fatal=False) r'<div id="commentBar">([\d,\.]+) Comments</div>',
if comment_count: webpage, 'comment count', fatal=False))
comment_count = str_to_int(comment_count)
player_quality_option = json.loads(self._html_search_regex( formats = []
r'playerQualityOption = ({.+?});', webpage, 'player quality option')) for format_id, video_url in re.findall(
r'flashvars\.quality_(.+?)\s*=\s*"([^"]+)"', webpage):
QUALITIES = ['3gp', 'mp4_normal', 'mp4_high', 'flv', 'mp4_ultra', 'mp4_720', 'mp4_1080'] fmt = {
formats = [ 'url': compat_urllib_parse.unquote(video_url),
{
'url': furl,
'format_id': format_id, 'format_id': format_id,
'preference': QUALITIES.index(format_id) if format_id in QUALITIES else -1, }
} for format_id, furl in player_quality_option.items() m = re.search(r'^(?P<height>\d+)[pP]', format_id)
] if m:
fmt['height'] = int(m.group('height'))
formats.append(fmt)
if not formats:
video_url = compat_urllib_parse.unquote(self._search_regex(
r'flashvars\.video_url\s*=\s*"([^"]+)"',
webpage, 'video URL'))
formats.append({'url': video_url})
self._sort_formats(formats) self._sort_formats(formats)
return { return {

View File

@@ -0,0 +1,81 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_duration,
int_or_none,
)
class XXXYMoviesIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?xxxymovies\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)'
_TEST = {
'url': 'http://xxxymovies.com/videos/138669/ecstatic-orgasm-sofcore/',
'md5': '810b1bdbbffff89dd13bdb369fe7be4b',
'info_dict': {
'id': '138669',
'display_id': 'ecstatic-orgasm-sofcore',
'ext': 'mp4',
'title': 'Ecstatic Orgasm Sofcore',
'duration': 931,
'categories': list,
'view_count': int,
'like_count': int,
'dislike_count': int,
'age_limit': 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
video_url = self._search_regex(
r"video_url\s*:\s*'([^']+)'", webpage, 'video URL')
title = self._html_search_regex(
[r'<div class="block_header">\s*<h1>([^<]+)</h1>',
r'<title>(.*?)\s*-\s*XXXYMovies\.com</title>'],
webpage, 'title')
thumbnail = self._search_regex(
r"preview_url\s*:\s*'([^']+)'",
webpage, 'thumbnail', fatal=False)
categories = self._html_search_meta(
'keywords', webpage, 'categories', default='').split(',')
duration = parse_duration(self._search_regex(
r'<span>Duration:</span>\s*(\d+:\d+)',
webpage, 'duration', fatal=False))
view_count = int_or_none(self._html_search_regex(
r'<div class="video_views">\s*(\d+)',
webpage, 'view count', fatal=False))
like_count = int_or_none(self._search_regex(
r'>\s*Likes? <b>\((\d+)\)',
webpage, 'like count', fatal=False))
dislike_count = int_or_none(self._search_regex(
r'>\s*Dislike <b>\((\d+)\)</b>',
webpage, 'dislike count', fatal=False))
age_limit = self._rta_search(webpage)
return {
'id': video_id,
'display_id': display_id,
'url': video_url,
'title': title,
'thumbnail': thumbnail,
'categories': categories,
'duration': duration,
'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,
'age_limit': age_limit,
}

View File

@@ -12,6 +12,7 @@ from ..compat import (
) )
from ..utils import ( from ..utils import (
clean_html, clean_html,
unescapeHTML,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
) )
@@ -55,14 +56,14 @@ class YahooIE(InfoExtractor):
} }
}, },
{ {
'url': 'https://tw.screen.yahoo.com/taipei-opinion-poll/選情站報-街頭民調-台北市篇-102823042.html', 'url': 'https://tw.screen.yahoo.com/election-2014-askmayor/敢問市長-黃秀霜批賴清德-非常高傲-033009720.html',
'md5': '92a7fdd8a08783c68a174d7aa067dde8', 'md5': '3a09cf59349cfaddae1797acc3c087fc',
'info_dict': { 'info_dict': {
'id': '7a23b569-7bea-36cb-85b9-bd5301a0a1fb', 'id': 'cac903b3-fcf4-3c14-b632-643ab541712f',
'ext': 'mp4', 'ext': 'mp4',
'title': '選情站報 街頭民調 台北市篇', 'title': '敢問市長/黃秀霜批賴清德「非常高傲」',
'description': '選情站報 街頭民調 台北市篇', 'description': '直言台南沒捷運 交通居五都之末',
'duration': 429, 'duration': 396,
} }
}, },
{ {
@@ -87,14 +88,14 @@ class YahooIE(InfoExtractor):
'duration': 121, 'duration': 121,
} }
}, { }, {
'url': 'https://ca.finance.yahoo.com/news/20-most-valuable-brands-world-112600775.html', 'url': 'https://ca.finance.yahoo.com/news/hackers-sony-more-trouble-well-154609075.html',
'md5': '3e401e4eed6325aa29d9b96125fd5b4f', 'md5': '226a895aae7e21b0129e2a2006fe9690',
'info_dict': { 'info_dict': {
'id': 'c1b4c09c-8ed8-3b65-8b05-169c55358a83', 'id': 'e624c4bc-3389-34de-9dfc-025f74943409',
'ext': 'mp4', 'ext': 'mp4',
'title': "Apple Is The World's Most Valuable Brand", 'title': '\'The Interview\' TV Spot: War',
'description': 'md5:73eabc1a11c6f59752593b2ceefa1262', 'description': 'The Interview',
'duration': 21, 'duration': 30,
} }
}, { }, {
'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html', 'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
@@ -116,6 +117,16 @@ class YahooIE(InfoExtractor):
'description': 'md5:1428185051cfd1949807ad4ff6d3686a', 'description': 'md5:1428185051cfd1949807ad4ff6d3686a',
'duration': 201, 'duration': 201,
} }
}, {
'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html',
'md5': '989396ae73d20c6f057746fb226aa215',
'info_dict': {
'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
'ext': 'mp4',
'title': '\'True Story\' Trailer',
'description': 'True Story',
'duration': 150,
},
}, { }, {
'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html', 'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html',
'only_matching': True, 'only_matching': True,
@@ -125,6 +136,7 @@ class YahooIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('display_id') display_id = mobj.group('display_id')
page_id = mobj.group('id')
url = mobj.group('url') url = mobj.group('url')
host = mobj.group('host') host = mobj.group('host')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
@@ -149,6 +161,7 @@ class YahooIE(InfoExtractor):
r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"', r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"', r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"',
r'"first_videoid"\s*:\s*"([^"]+)"', r'"first_videoid"\s*:\s*"([^"]+)"',
r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id),
] ]
video_id = self._search_regex(CONTENT_ID_REGEXES, webpage, 'content ID') video_id = self._search_regex(CONTENT_ID_REGEXES, webpage, 'content ID')
else: else:
@@ -163,17 +176,15 @@ class YahooIE(InfoExtractor):
region = self._search_regex( region = self._search_regex(
r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"', r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"',
webpage, 'region', fatal=False, default='US') webpage, 'region', fatal=False, default='US')
query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"'
' AND plrs="86Gj0vCaSzV_Iuf6hNylf2" AND region="%s"'
' AND protocol="http"' % (video_id, region))
data = compat_urllib_parse.urlencode({ data = compat_urllib_parse.urlencode({
'q': query, 'protocol': 'http',
'env': 'prod', 'region': region,
'format': 'json',
}) })
query_url = (
'https://video.media.yql.yahoo.com/v1/video/sapi/streams/'
'{id}?{data}'.format(id=video_id, data=data))
query_result = self._download_json( query_result = self._download_json(
'http://video.query.yahoo.com/v1/public/yql?' + data, query_url, display_id, 'Downloading video info')
display_id, 'Downloading video info')
info = query_result['query']['results']['mediaObj'][0] info = query_result['query']['results']['mediaObj'][0]
meta = info.get('meta') meta = info.get('meta')
@@ -211,7 +222,7 @@ class YahooIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': meta['title'], 'title': unescapeHTML(meta['title']),
'formats': formats, 'formats': formats,
'description': clean_html(meta['description']), 'description': clean_html(meta['description']),
'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage), 'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage),

View File

@@ -0,0 +1,62 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
HEADRequest,
get_element_by_attribute,
parse_iso8601,
)
class YesJapanIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?yesjapan\.com/video/(?P<slug>[A-Za-z0-9\-]*)_(?P<id>[A-Za-z0-9]+)\.html'
_TEST = {
'url': 'http://www.yesjapan.com/video/japanese-in-5-20-wa-and-ga-particle-usages_726497834.html',
'md5': 'f0be416314e5be21a12b499b330c21cf',
'info_dict': {
'id': '726497834',
'title': 'Japanese in 5! #20 - WA And GA Particle Usages',
'description': 'This should clear up some issues most students of Japanese encounter with WA and GA....',
'ext': 'mp4',
'timestamp': 1416391590,
'upload_date': '20141119',
'thumbnail': 're:^https?://.*\.jpg$',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage)
video_url = self._og_search_video_url(webpage)
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
timestamp = None
submit_info = get_element_by_attribute('class', 'pm-submit-data', webpage)
if submit_info:
timestamp = parse_iso8601(self._search_regex(
r'datetime="([^"]+)"', submit_info, 'upload date', fatal=False, default=None))
# attempt to resolve the final URL in order to get a proper extension
redirect_req = HEADRequest(video_url)
req = self._request_webpage(
redirect_req, video_id, note='Resolving final URL', errnote='Could not resolve final URL', fatal=False)
if req:
video_url = req.geturl()
formats = [{
'format_id': 'sd',
'url': video_url,
}]
return {
'id': video_id,
'title': title,
'formats': formats,
'description': description,
'timestamp': timestamp,
'thumbnail': thumbnail,
}

View File

@@ -256,7 +256,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'}, '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
@@ -418,6 +418,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'upload_date': '20140605', 'upload_date': '20140605',
}, },
}, },
# Age-gate video with encrypted signature
{
'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
'info_dict': {
'id': '6kLq3WMV1nU',
'ext': 'mp4',
'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
'uploader': 'LloydVEVO',
'uploader_id': 'LloydVEVO',
'upload_date': '20110629',
},
},
# video_info is None (https://github.com/rg3/youtube-dl/issues/4421) # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
{ {
'url': '__2ABJjxzNo', 'url': '__2ABJjxzNo',
@@ -478,7 +491,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
def _extract_signature_function(self, video_id, player_url, example_sig): def _extract_signature_function(self, video_id, player_url, example_sig):
id_m = re.match( id_m = re.match(
r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$', r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
player_url) player_url)
if not id_m: if not id_m:
raise ExtractorError('Cannot identify player %r' % player_url) raise ExtractorError('Cannot identify player %r' % player_url)
@@ -527,8 +540,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return 's[%s%s%s]' % (starts, ends, steps) return 's[%s%s%s]' % (starts, ends, steps)
step = None step = None
start = '(Never used)' # Quelch pyflakes warnings - start will be # Quelch pyflakes warnings - start will be set when step is set
# set as soon as step is set start = '(Never used)'
for i, prev in zip(idxs[1:], idxs[:-1]): for i, prev in zip(idxs[1:], idxs[:-1]):
if step is not None: if step is not None:
if i - prev == step: if i - prev == step:
@@ -599,24 +612,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
def _get_available_subtitles(self, video_id, webpage): def _get_available_subtitles(self, video_id, webpage):
try: try:
sub_list = self._download_webpage( subs_doc = self._download_xml(
'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id, 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
video_id, note=False) video_id, note=False)
except ExtractorError as err: except ExtractorError as err:
self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err)) self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
return {} return {}
lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
sub_lang_list = {} sub_lang_list = {}
for l in lang_list: for track in subs_doc.findall('track'):
lang = l[1] lang = track.attrib['lang_code']
if lang in sub_lang_list: if lang in sub_lang_list:
continue continue
params = compat_urllib_parse.urlencode({ params = compat_urllib_parse.urlencode({
'lang': lang, 'lang': lang,
'v': video_id, 'v': video_id,
'fmt': self._downloader.params.get('subtitlesformat', 'srt'), 'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
'name': unescapeHTML(l[0]).encode('utf-8'), 'name': track.attrib['name'].encode('utf-8'),
}) })
url = 'https://www.youtube.com/api/timedtext?' + params url = 'https://www.youtube.com/api/timedtext?' + params
sub_lang_list[lang] = url sub_lang_list[lang] = url
@@ -649,10 +661,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
list_url = caption_url + '&' + list_params list_url = caption_url + '&' + list_params
caption_list = self._download_xml(list_url, video_id) caption_list = self._download_xml(list_url, video_id)
original_lang_node = caption_list.find('track') original_lang_node = caption_list.find('track')
if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr': if original_lang_node is None:
self._downloader.report_warning('Video doesn\'t have automatic captions') self._downloader.report_warning('Video doesn\'t have automatic captions')
return {} return {}
original_lang = original_lang_node.attrib['lang_code'] original_lang = original_lang_node.attrib['lang_code']
caption_kind = original_lang_node.attrib.get('kind', '')
sub_lang_list = {} sub_lang_list = {}
for lang_node in caption_list.findall('target'): for lang_node in caption_list.findall('target'):
@@ -662,7 +675,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'tlang': sub_lang, 'tlang': sub_lang,
'fmt': sub_format, 'fmt': sub_format,
'ts': timestamp, 'ts': timestamp,
'kind': 'asr', 'kind': caption_kind,
}) })
sub_lang_list[sub_lang] = caption_url + '&' + params sub_lang_list[sub_lang] = caption_url + '&' + params
return sub_lang_list return sub_lang_list
@@ -723,6 +736,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'format_id': format_id, 'format_id': format_id,
'url': video_url, 'url': video_url,
'width': int_or_none(r.attrib.get('width')), 'width': int_or_none(r.attrib.get('width')),
'height': int_or_none(r.attrib.get('height')),
'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
'asr': int_or_none(r.attrib.get('audioSamplingRate')), 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
'filesize': filesize, 'filesize': filesize,
@@ -733,7 +747,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
fo for fo in formats fo for fo in formats
if fo['format_id'] == format_id) if fo['format_id'] == format_id)
except StopIteration: except StopIteration:
f.update(self._formats.get(format_id, {})) f.update(self._formats.get(format_id, {}).items())
formats.append(f) formats.append(f)
else: else:
existing_format.update(f) existing_format.update(f)
@@ -766,11 +780,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
age_gate = True age_gate = True
# We simulate the access to the video from www.youtube.com/v/{video_id} # We simulate the access to the video from www.youtube.com/v/{video_id}
# this can be viewed without login into Youtube # this can be viewed without login into Youtube
url = proto + '://www.youtube.com/embed/%s' % video_id
embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
data = compat_urllib_parse.urlencode({ data = compat_urllib_parse.urlencode({
'video_id': video_id, 'video_id': video_id,
'eurl': 'https://youtube.googleapis.com/v/' + video_id, 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
'sts': self._search_regex( 'sts': self._search_regex(
r'"sts"\s*:\s*(\d+)', video_webpage, 'sts', default=''), r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
}) })
video_info_url = proto + '://www.youtube.com/get_video_info?' + data video_info_url = proto + '://www.youtube.com/get_video_info?' + data
video_info_webpage = self._download_webpage( video_info_webpage = self._download_webpage(
@@ -968,11 +984,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
elif 's' in url_data: elif 's' in url_data:
encrypted_sig = url_data['s'][0] encrypted_sig = url_data['s'][0]
if not age_gate: jsplayer_url_json = self._search_regex(
jsplayer_url_json = self._search_regex( r'"assets":.+?"js":\s*("[^"]+")',
r'"assets":.+?"js":\s*("[^"]+")', embed_webpage if age_gate else video_webpage, 'JS player URL')
video_webpage, 'JS player URL') player_url = json.loads(jsplayer_url_json)
player_url = json.loads(jsplayer_url_json)
if player_url is None: if player_url is None:
player_url_json = self._search_regex( player_url_json = self._search_regex(
r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")', r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
@@ -1026,6 +1041,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
self.report_warning( self.report_warning(
'Skipping DASH manifest: %r' % e, video_id) 'Skipping DASH manifest: %r' % e, video_id)
else: else:
# Hide the formats we found through non-DASH
dash_keys = set(df['format_id'] for df in dash_formats)
for f in formats:
if f['format_id'] in dash_keys:
f['format_id'] = 'nondash-%s' % f['format_id']
f['preference'] -= 10000
formats.extend(dash_formats) formats.extend(dash_formats)
self._sort_formats(formats) self._sort_formats(formats)
@@ -1128,6 +1149,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
'info_dict': { 'info_dict': {
'title': 'JODA7', 'title': 'JODA7',
} }
}, {
'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
'info_dict': {
'title': 'Uploads from Interstellar Movie',
},
'playlist_mincout': 21,
}] }]
def _real_initialize(self): def _real_initialize(self):
@@ -1212,6 +1240,10 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
'Downloading page #%s' % page_num, 'Downloading page #%s' % page_num,
transform_source=uppercase_escape) transform_source=uppercase_escape)
content_html = more['content_html'] content_html = more['content_html']
if not content_html.strip():
# Some webpages show a "Load more" button but they don't
# have more videos
break
more_widget_html = more['load_more_widget_html'] more_widget_html = more['load_more_widget_html']
playlist_title = self._html_search_regex( playlist_title = self._html_search_regex(
@@ -1555,9 +1587,11 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
feed_entries = [] feed_entries = []
paging = 0 paging = 0
for i in itertools.count(1): for i in itertools.count(1):
info = self._download_json(self._FEED_TEMPLATE % paging, info = self._download_json(
'%s feed' % self._FEED_NAME, self._FEED_TEMPLATE % paging,
'Downloading page %s' % i) '%s feed' % self._FEED_NAME,
'Downloading page %s' % i,
transform_source=uppercase_escape)
feed_html = info.get('feed_html') or info.get('content_html') feed_html = info.get('feed_html') or info.get('content_html')
load_more_widget_html = info.get('load_more_widget_html') or feed_html load_more_widget_html = info.get('load_more_widget_html') or feed_html
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html) m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
@@ -1674,3 +1708,20 @@ class YoutubeTruncatedURLIE(InfoExtractor):
'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" ' '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
' or simply youtube-dl BaW_jenozKc .', ' or simply youtube-dl BaW_jenozKc .',
expected=True) expected=True)
class YoutubeTruncatedIDIE(InfoExtractor):
IE_NAME = 'youtube:truncated_id'
IE_DESC = False # Do not list
_VALID_URL = r'https?://(?:www\.)youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
_TESTS = [{
'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
raise ExtractorError(
'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
expected=True)

View File

@@ -109,7 +109,7 @@ def parseOpts(overrideArguments=None):
kw = { kw = {
'version': __version__, 'version': __version__,
'formatter': fmt, 'formatter': fmt,
'usage': '%prog [options] url [url...]', 'usage': '%prog [OPTIONS] URL [URL...]',
'conflict_handler': 'resolve', 'conflict_handler': 'resolve',
} }

View File

@@ -8,11 +8,16 @@ from .ffmpeg import (
FFmpegExtractAudioPP, FFmpegExtractAudioPP,
FFmpegMergerPP, FFmpegMergerPP,
FFmpegMetadataPP, FFmpegMetadataPP,
FFmpegVideoConvertor, FFmpegVideoConvertorPP,
) )
from .xattrpp import XAttrMetadataPP from .xattrpp import XAttrMetadataPP
from .execafterdownload import ExecAfterDownloadPP from .execafterdownload import ExecAfterDownloadPP
def get_postprocessor(key):
return globals()[key + 'PP']
__all__ = [ __all__ = [
'AtomicParsleyPP', 'AtomicParsleyPP',
'ExecAfterDownloadPP', 'ExecAfterDownloadPP',
@@ -22,6 +27,6 @@ __all__ = [
'FFmpegMergerPP', 'FFmpegMergerPP',
'FFmpegMetadataPP', 'FFmpegMetadataPP',
'FFmpegPostProcessor', 'FFmpegPostProcessor',
'FFmpegVideoConvertor', 'FFmpegVideoConvertorPP',
'XAttrMetadataPP', 'XAttrMetadataPP',
] ]

View File

@@ -236,9 +236,9 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
return self._nopostoverwrites, information return self._nopostoverwrites, information
class FFmpegVideoConvertor(FFmpegPostProcessor): class FFmpegVideoConvertorPP(FFmpegPostProcessor):
def __init__(self, downloader=None, preferedformat=None): def __init__(self, downloader=None, preferedformat=None):
super(FFmpegVideoConvertor, self).__init__(downloader) super(FFmpegVideoConvertorPP, self).__init__(downloader)
self._preferedformat = preferedformat self._preferedformat = preferedformat
def run(self, information): def run(self, information):

View File

@@ -363,7 +363,7 @@ def encodeArgument(s):
if not isinstance(s, compat_str): if not isinstance(s, compat_str):
# Legacy code that uses byte strings # Legacy code that uses byte strings
# Uncomment the following line after fixing all post processors # Uncomment the following line after fixing all post processors
#assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
s = s.decode('ascii') s = s.decode('ascii')
return encodeFilename(s, True) return encodeFilename(s, True)
@@ -464,6 +464,13 @@ class ExtractorError(Exception):
return ''.join(traceback.format_tb(self.traceback)) return ''.join(traceback.format_tb(self.traceback))
class UnsupportedError(ExtractorError):
def __init__(self, url):
super(UnsupportedError, self).__init__(
'Unsupported URL: %s' % url, expected=True)
self.url = url
class RegexNotFoundError(ExtractorError): class RegexNotFoundError(ExtractorError):
"""Error when a regex didn't match""" """Error when a regex didn't match"""
pass pass
@@ -1262,18 +1269,25 @@ def check_executable(exe, args=[]):
def get_exe_version(exe, args=['--version'], def get_exe_version(exe, args=['--version'],
version_re=r'version\s+([0-9._-a-zA-Z]+)', version_re=None, unrecognized='present'):
unrecognized='present'):
""" Returns the version of the specified executable, """ Returns the version of the specified executable,
or False if the executable is not present """ or False if the executable is not present """
try: try:
out, err = subprocess.Popen( out, _ = subprocess.Popen(
[exe] + args, [exe] + args,
stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate() stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
except OSError: except OSError:
return False return False
firstline = out.partition(b'\n')[0].decode('ascii', 'ignore') if isinstance(out, bytes): # Python 2.x
m = re.search(version_re, firstline) out = out.decode('ascii', 'ignore')
return detect_exe_version(out, version_re, unrecognized)
def detect_exe_version(output, version_re=None, unrecognized='present'):
assert isinstance(output, compat_str)
if version_re is None:
version_re = r'version\s+([-0-9._a-zA-Z]+)'
m = re.search(version_re, output)
if m: if m:
return m.group(1) return m.group(1)
else: else:

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2014.12.13.1' __version__ = '2015.01.03'