Compare commits

..

132 Commits

Author SHA1 Message Date
Philipp Hagemeister
b3fa3917e2 release 2014.02.08 2014-02-08 16:25:03 +01:00
Sergey M.
082c6c867a [bbc.co.uk] Add support for bbc.co.uk radio programmes (Closes #2184) 2014-02-08 21:55:28 +07:00
Filippo Valsorda
03fcf1ab57 Merge pull request #2342 from MikeCol/tube8
[Tube8] Extended valid urls schema
2014-02-08 04:00:50 +01:00
MikeCol
3b00dea5eb Extended valid urls schema 2014-02-08 00:09:26 +01:00
Philipp Hagemeister
8bc6c8e3c0 [chilloutzone] Add additional tests (#2340) 2014-02-07 15:42:31 +01:00
Sergey M.
79bc27b53a [channel9] Simplify 2014-02-07 19:41:18 +07:00
Sergey M.
84dd703199 [ivi] Simplify 2014-02-07 19:36:50 +07:00
Sergey M.
c6fdba23a6 [nfb] Add workaround for python2.6 2014-02-07 19:23:53 +07:00
Philipp Hagemeister
b19fe521a9 Merge pull request #2340 from Fnordlab/master
[chilloutzone] Fixes refactoring bug
2014-02-07 12:46:56 +01:00
Andreas Schmitz
c1e672d121 [chilloutzone] fixes bug with youtube extraction
the id used for extracting the video from youtube is stored in
native_video_id not video_id. This id is only used on chilloutzone.net
2014-02-07 12:29:58 +01:00
Andreas Schmitz
f4371f4784 Merge remote-tracking branch 'upstream/master' 2014-02-07 12:20:58 +01:00
Philipp Hagemeister
d914d9d187 [chilloutzone] Add import 2014-02-07 12:03:19 +01:00
Philipp Hagemeister
845d14d377 credit @Fnordlab for chilloutzone 2014-02-07 12:00:58 +01:00
Philipp Hagemeister
4a9540b6d2 [chilloutzone] Simplify (#2338) 2014-02-07 12:00:25 +01:00
Philipp Hagemeister
9f31be7000 Merge remote-tracking branch 'Fnordlab/chilloutzone' 2014-02-07 11:50:26 +01:00
Philipp Hagemeister
41fa1b627d release 2014.02.06.3 2014-02-07 01:41:01 +01:00
Andreas Schmitz
c0c4e66b29 Merge branch 'chilloutzone' 2014-02-06 21:33:16 +01:00
Andreas Schmitz
cd8662de22 [chilloutzone] Bug fix, runs against tests
Fixes a bug with python3.3 and made the extractor run successfully
against tox
2014-02-06 21:31:04 +01:00
Sergey M.
3587159614 [nfb] Add encode POST data 2014-02-07 02:13:04 +07:00
Jaime Marquínez Ferrándiz
d67cc9fa7c [youtube:playlist] Recognize ‘top tracks’ urls (closes #2332)
The list parameter starts with ‘MC’ and can have more characters after it, including dots
2014-02-06 19:46:26 +01:00
Sergey M.
bf3a2fe923 [elpais] Fix typo 2014-02-07 00:38:29 +07:00
Sergey M.
e9ea0bf123 [ndr] Add support for ndr.de (Closes #2325) 2014-02-07 00:35:26 +07:00
Philipp Hagemeister
63424b6233 release 2014.02.06.2 2014-02-06 15:45:47 +01:00
Sergey M.
0bf35c5cf5 [nfb] Add support for onf.ca URLs 2014-02-06 21:41:31 +07:00
Sergey M.
95c29381eb [mooshare] Fix bogus video page URL 2014-02-06 21:26:12 +07:00
Sergey M.
94c4abce7f [nfb] Add support for nfb.ca (Closes #2069) 2014-02-06 21:19:13 +07:00
Andreas Schmitz
f2dffe55f8 Merge branch 'chilloutzone' 2014-02-06 11:49:38 +01:00
Andreas Schmitz
46a073bfac [chilloutzone] Added support for chilloutzone.net
Added support for chilloutzone.net videos including embedded youtube
and vimeo movies. In case you find a not working movie, drop me an
email.
2014-02-06 11:44:44 +01:00
Philipp Hagemeister
df872ec4e7 release 2014.02.06.1 2014-02-06 11:30:00 +01:00
Philipp Hagemeister
5de90176d9 [elpais] Add extractor 2014-02-06 11:29:46 +01:00
Philipp Hagemeister
dcf3eec47a [test_download] Skip over BadStatusLine errors
An error like https://travis-ci.org/rg3/youtube-dl/jobs/18317799#L449 is almost certainly the server's fault.
2014-02-06 04:19:57 +01:00
Philipp Hagemeister
e9e4f30d26 [pbs] Remove unused import 2014-02-06 04:19:43 +01:00
Philipp Hagemeister
83cebd73d4 [collegehumor] We only get shortened descriptions now 2014-02-06 04:16:22 +01:00
Philipp Hagemeister
1df4229bd7 [mtv/gametrailers] Change order of title preference
It looks like the plain title is better again
2014-02-06 04:15:12 +01:00
Philipp Hagemeister
3c995527e9 release 2014.02.06 2014-02-06 03:30:30 +01:00
Philipp Hagemeister
7c62b568a2 Merge branch 'master' of github.com:rg3/youtube-dl 2014-02-06 03:30:18 +01:00
Philipp Hagemeister
ccf9114e84 [googlesearch] Fix start, and skip playlists (Fixes #2329) 2014-02-06 03:29:10 +01:00
Jaime Marquínez Ferrándiz
d8061908bb [ina] Improve _VALID_URL regex (fixes #2328)
Accept all letters in upper case and don’t require anything after the id
2014-02-05 23:01:24 +01:00
Philipp Hagemeister
211e17dd43 release 2014.02.05 2014-02-05 21:23:28 +01:00
Philipp Hagemeister
6cb38a9994 [firstpost] Add extractor (Fixes #2324) 2014-02-05 21:23:21 +01:00
Sergey M.
fa7df757a7 [thisav] Simplify and use unicode literals 2014-02-05 19:13:06 +07:00
Sergey M.
8c82077619 [toutv] Use unicode literals 2014-02-05 19:02:03 +07:00
Sergey M.
e5d1f9e50a [m6] Add support for m6.fr (Closes #2313) 2014-02-05 17:38:17 +07:00
Philipp Hagemeister
7ee50ae7b5 release 2014.02.04.1 2014-02-04 23:26:55 +01:00
Jaime Marquínez Ferrándiz
de563c9da0 [ina] Simplify
Download the feed with ‘_download_xml’ to make the extraction easier
2014-02-04 23:15:36 +01:00
Jaime Marquínez Ferrándiz
50451f2a18 [vbox7] simplify 2014-02-04 23:02:53 +01:00
Jaime Marquínez Ferrándiz
9bc70948e1 [statigram] Simplify 2014-02-04 22:52:27 +01:00
Jaime Marquínez Ferrándiz
5dc733f071 [vine] Simplify 2014-02-04 22:02:15 +01:00
Jaime Marquínez Ferrándiz
bc4850908c [test/youtube_signature] Add a test with the last player
To verify it correctly handles function with “$” in their names.
2014-02-04 21:56:17 +01:00
Jaime Marquínez Ferrándiz
20650c8654 [youtube] signatures: Recognize javascript functions that contain “$” (fixes #2304) 2014-02-04 21:38:50 +01:00
Philipp Hagemeister
56dced2670 remove accidentally duplicated test file 2014-02-04 16:35:22 +01:00
Philipp Hagemeister
eef726c04b release 2014.02.04 2014-02-04 16:33:19 +01:00
Philipp Hagemeister
acf1555d76 Merge remote-tracking branch 'origin/master' 2014-02-04 16:33:06 +01:00
Philipp Hagemeister
22e7f1a6ec [pbs] Add support for article pages (Fixes #870) 2014-02-04 16:31:00 +01:00
Sergey M.
3c49325658 [lifenews] Fix video URL extraction (Closes #2302) 2014-02-04 21:31:25 +07:00
Sergey M
bb1cd2bea1 [mooshare] Add support for mooshare.biz (Closes #2149) 2014-02-04 20:53:46 +07:00
Philipp Hagemeister
fdf1f8d4ce [collegehumor] Adapt test to changed video description 2014-02-04 10:37:01 +01:00
Philipp Hagemeister
117c8c6b97 [bliptv] Remove unused imports 2014-02-04 10:25:19 +01:00
Philipp Hagemeister
5cef4ff09b [subtittles] Check that the result is not empty 2014-02-04 10:24:17 +01:00
Philipp Hagemeister
91264ce572 [iprima] Use centralized format sorting 2014-02-04 10:24:00 +01:00
Philipp Hagemeister
c79ef8e1ae Merge remote-tracking branch 'pulpe/_iprima' 2014-02-04 10:21:42 +01:00
Philipp Hagemeister
58d915df51 [traileraddict] mark as broken
traileraddict has changed their URL encoding scheme.
I'm working on restoring support, but that may take some time.
2014-02-04 10:13:52 +01:00
pulpe
7881a64499 [iprima] Add support for play.iprima.cz 2014-02-04 07:45:41 +01:00
Philipp Hagemeister
90159f5561 release 2014.02.03.1 2014-02-03 15:20:41 +01:00
Philipp Hagemeister
99877772d0 [generic] Add support for multiple brightcove URLs (Fixes #2283) 2014-02-03 15:19:40 +01:00
Sergey M.
b0268cb6ce [vimeo] Remove superfluous whitespace 2014-02-03 20:24:11 +07:00
Sergey M.
4edff4cfa8 [vimeo] Add subtitle tests 2014-02-03 20:19:23 +07:00
Sergey M.
1eac553e7e [vimeo] Add support for subtitles (Closes #2239) 2014-02-03 20:02:58 +07:00
Philipp Hagemeister
9d3ac7444d release 2014.02.03 2014-02-03 06:54:37 +01:00
Philipp Hagemeister
588128d054 Add --ignore-config option (Fixes #633) 2014-02-03 06:54:27 +01:00
Philipp Hagemeister
8e93b9b9aa Merge remote-tracking branch 'origin/master'
Conflicts:
	youtube_dl/extractor/bliptv.py
2014-02-03 05:19:28 +01:00
Philipp Hagemeister
b4bcffefa3 [blip.tv] Add support for subtitles (#2274) 2014-02-03 05:18:30 +01:00
Filippo Valsorda
2b39af9b4f [BlipTV] Add a test case w/ subtitles (#2274) 2014-02-03 02:41:59 +01:00
Filippo Valsorda
23fe495feb Merge pull request #2274 from z00nx/master
[bliptv] Filter out SRT files
2014-02-02 17:31:57 -08:00
Sergey M
b5dbe89bba Merge branch 'master' of https://github.com/rg3/youtube-dl 2014-02-03 01:22:41 +07:00
Sergey M.
dbe80ca7ad [tinypic] Add support for tinypic.com videos (Closes #2210) 2014-02-03 01:20:03 +07:00
Jaime Marquínez Ferrándiz
009a3408f5 [cspan] Fix extraction (fixes #2291)
The webpage urls have changed.
The title and thumbnail are now extracted from an xml.
2014-02-02 18:24:20 +01:00
dst
b58e3c8918 [vube] Use 'id' and 'ext' instead of 'file' 2014-02-02 20:04:44 +07:00
Philipp Hagemeister
56b6faf91e [traileraddict] Fix extraction 2014-02-02 12:52:47 +01:00
Philipp Hagemeister
7ac1f877a7 [collegehumor] Fix test
The description simply changed, our code is working fine
2014-02-02 12:43:09 +01:00
Philipp Hagemeister
d55433bbfd Remove unused imports and simplify 2014-02-02 12:03:36 +01:00
Philipp Hagemeister
f0ce2bc1c5 Merge remote-tracking branch 'dstftw/vube' 2014-02-02 11:54:23 +01:00
Filippo Valsorda
c3bc00b90e [Normalboots] Update test video description 2014-02-02 07:17:48 +01:00
Filippo Valsorda
ff6b7b049b Merge pull request #2279 from prutz1311/master
Added support for normalboots.com (#2237)
2014-02-01 22:16:37 -08:00
dst
f46359121f [vube] Make video description optional as it may be missing 2014-02-02 12:03:55 +07:00
dst
37c1525c17 [vube] Remove unnecessary coding cookie 2014-02-02 10:49:38 +07:00
dst
c85e4cf7b4 [vube] Add support for vube.com (Closes #2285) 2014-02-02 08:33:24 +07:00
Jaime Marquínez Ferrándiz
c66dcda287 Merge pull request #2282 from dstftw/lifenews
[lifenews] Add support for lifenews.ru and fix og content extraction regex
2014-01-31 10:23:46 -08:00
dst
6d845922ab [lifenews] Fix test title 2014-02-01 01:10:15 +07:00
Oleg Prutz
2949cbe036 Update normalboots.py
fixed
2014-01-31 16:51:34 +03:00
Jaime Marquínez Ferrándiz
c3309a7774 [collegehumor] fix test description 2014-01-31 14:48:49 +01:00
Jaime Marquínez Ferrándiz
7aed837595 [ro220] Simplify and use unicode_literals 2014-01-31 14:07:58 +01:00
Jaime Marquínez Ferrándiz
0eb799bae9 [ustream] Simplify and use unicode_literals 2014-01-31 14:05:33 +01:00
Jaime Marquínez Ferrándiz
4baff4a4ae [spiegel] Simplify and use unicode_literals 2014-01-31 14:00:55 +01:00
Jaime Marquínez Ferrándiz
45d7bc2f8b [vevo] Simplify and use unicode_literals 2014-01-31 13:56:45 +01:00
Philipp Hagemeister
c0c2ddddcd Merge pull request #2281 from matthewfranglen/master
Fix #2280: Antigen now links to python script
2014-01-30 19:24:43 -08:00
Philipp Hagemeister
a96ed91610 Add tutorial for adding a new IE 2014-01-31 04:23:39 +01:00
dst
c1206423c4 Fix extraction of og content in single quotes 2014-01-31 03:57:33 +07:00
dst
659aa21ba1 [lifenews] Add support for lifenews.ru 2014-01-31 03:48:00 +07:00
Matthew Franglen
efd02e858a Fix #2280: Antigen now links to python script 2014-01-30 20:44:16 +00:00
Oleg Prutz
3bf8bc7f37 Update normalboots.py
_TEST added
2014-01-30 23:01:35 +03:00
Philipp Hagemeister
8ccda826d5 release 2014.01.30.2 2014-01-30 19:33:02 +01:00
Jaime Marquínez Ferrándiz
b9381e43c2 Fix the extraction of full-episodes urls from southpark.com (fixes #2278)
Added an additional regex to the generic _real_extract method of MTVServicesInfoExtractor
2014-01-30 19:04:33 +01:00
Jaime Marquínez Ferrándiz
fcdea2666d [collegehumor] Add support for embedded youtube videos (fixes #2277) 2014-01-30 18:33:49 +01:00
Jaime Marquínez Ferrándiz
c4db377cbb [collegehumor] The video may not contain any file in webm format (#2277)
For example http://www.collegehumor.com/video/5812266
2014-01-30 18:33:49 +01:00
Philipp Hagemeister
90dc5e8693 Merge pull request #2252 from matthewfranglen/master
Add antigen compatible plugin description
2014-01-30 09:28:10 -08:00
Oleg Prutz
c81a855b0f Added support for normalboots.com 2014-01-30 21:26:50 +04:00
Matthew Franglen
c8d8ec8567 Add requested documentation 2014-01-30 15:09:09 +00:00
z00nx 0
4f879a5be0 [bliptv] Filter out SRT files 2014-01-30 20:44:53 +11:00
Philipp Hagemeister
1a0648b4a9 [malemotion] Disable test case
I am not going to look for an alternative one, but feel free to suggest one.
2014-01-30 06:15:50 +01:00
Philipp Hagemeister
3c1b4669d0 [francetv] Use unicode_literals 2014-01-30 06:13:57 +01:00
Philipp Hagemeister
24b3d5e538 [francetvinfo.fr] Support more ID suffixes 2014-01-30 06:12:56 +01:00
Philipp Hagemeister
ab083b08ab [generic] remove testcase
The video seems to have been removed from the site.
2014-01-30 06:10:57 +01:00
Philipp Hagemeister
89acb96927 [liveleak] Support old and new URLs 2014-01-30 06:09:06 +01:00
Philipp Hagemeister
79752e18b1 release 2014.01.30.1 2014-01-30 05:33:31 +01:00
Philipp Hagemeister
55b41c723c Merge branch 'master' of github.com:rg3/youtube-dl 2014-01-30 05:30:16 +01:00
Philipp Hagemeister
9f8928d032 [generic] Match JWPlayerOptions
This adds support for The Guardian, among others
Closes #2271, fixes #2267
2014-01-30 05:29:10 +01:00
Philipp Hagemeister
3effa7ceaa Merge pull request #2273 from dstftw/crunchyroll
[crunchyroll] Add support for mobile URLs and use unicode literals
2014-01-29 20:15:38 -08:00
Philipp Hagemeister
ed9cc2f1e0 release 2014.01.30 2014-01-30 04:52:54 +01:00
Philipp Hagemeister
975fa541c2 [liveleak] Support multiple formats (Fixes #2262) 2014-01-30 04:52:50 +01:00
Jaime Marquínez Ferrándiz
251974e44c Merge pull request #2272 from dstftw/master
Improve some regexes
2014-01-29 14:58:14 -08:00
dst
38a40276ec [crunchyroll] Add support for mobile URLs and use unicode literals 2014-01-30 05:23:44 +07:00
dst
57b6288358 [comedycentral] Improve regexes 2014-01-30 04:33:00 +07:00
dst
c3f51436bf Improve some regexes for embedded players 2014-01-30 04:26:46 +07:00
Jaime Marquínez Ferrándiz
0c708f11cb [bloomberg] Fix ooyala url extraction
Added a helper method to InfoExtractor for searching the ‘twitter:player’ meta property.
Now the OoyalaIE also recognizes the ‘ec’ parameter in the url as the embed code.
2014-01-29 18:03:32 +01:00
Jaime Marquínez Ferrándiz
fb2a706d11 [myspass] Simplify and use unicode_literals 2014-01-29 16:59:22 +01:00
Jaime Marquínez Ferrándiz
0b76600deb [youjizz] Simplify and use unicode_literals 2014-01-29 16:59:21 +01:00
Jaime Marquínez Ferrándiz
245b612a36 [rbmaradio] Simplify and use unicode_literals 2014-01-29 16:59:10 +01:00
Jaime Marquínez Ferrándiz
d882161d5a [infoq] Simplify and use unicode_literals 2014-01-29 15:34:35 +01:00
Jaime Marquínez Ferrándiz
d4a21e0b49 [tutv] Simplify and use unicode_literals 2014-01-29 15:22:41 +01:00
Jaime Marquínez Ferrándiz
26a78d4bbf [nba] Simplify and use unicode_literals
Remove the commented parts for extracting the upload date
2014-01-29 15:16:18 +01:00
Matthew Franglen
d1b30713fb Add antigen compatible plugin description 2014-01-27 15:33:16 +00:00
69 changed files with 2096 additions and 738 deletions

View File

@@ -53,6 +53,12 @@ which means you can modify it, redistribute it or use it however you like.
from google videos for youtube-dl "large
apple". By default (with value "auto")
youtube-dl guesses.
--ignore-config Do not read configuration files. When given
in the global configuration file /etc
/youtube-dl.conf: do not read the user
configuration in ~/.config/youtube-dl.conf
(%APPDATA%/youtube-dl/config.txt on
Windows)
## Video Selection:
--playlist-start NUMBER playlist video to start at (default is 1)
@@ -325,7 +331,7 @@ Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unz
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
# BUILD INSTRUCTIONS
# DEVELOPER INSTRUCTIONS
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
@@ -347,6 +353,10 @@ If you want to create a build of youtube-dl yourself, you'll need
* zip
* nosetests
### Adding support for a new site
If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py Test_Download.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/).
# BUGS
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email.

View File

@@ -37,6 +37,8 @@ class TestAllURLsMatching(unittest.TestCase):
assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M'))
# Top tracks
assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')
def test_youtube_matching(self):
self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))

View File

@@ -22,6 +22,7 @@ import socket
import youtube_dl.YoutubeDL
from youtube_dl.utils import (
compat_http_client,
compat_str,
compat_urllib_error,
compat_HTTPError,
@@ -110,7 +111,7 @@ def generator(test_case):
ydl.download([test_case['url']])
except (DownloadError, ExtractorError) as err:
# Check if the exception is not a network related one
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
raise
if try_num == RETRIES:

View File

@@ -34,6 +34,8 @@ from youtube_dl.extractor import (
KhanAcademyIE,
EveryonesMixtapeIE,
RutubeChannelIE,
GoogleSearchIE,
GenericIE,
)
@@ -229,6 +231,24 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['id'], '1409')
self.assertTrue(len(result['entries']) >= 34)
def test_multiple_brightcove_videos(self):
# https://github.com/rg3/youtube-dl/issues/2283
dl = FakeYDL()
ie = GenericIE(dl)
result = ie.extract('http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'always-never-nuclear-command-and-control')
self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
self.assertEqual(len(result['entries']), 3)
def test_GoogleSearch(self):
dl = FakeYDL()
ie = GoogleSearchIE(dl)
result = ie.extract('gvsearch15:python language')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'python language')
self.assertEqual(result['title'], 'python language')
self.assertTrue(len(result['entries']) == 15)
if __name__ == '__main__':
unittest.main()

View File

@@ -10,9 +10,11 @@ from test.helper import FakeYDL, md5
from youtube_dl.extractor import (
BlipTVIE,
YoutubeIE,
DailymotionIE,
TEDIE,
VimeoIE,
)
@@ -202,5 +204,80 @@ class TestTedSubtitles(BaseTestSubtitles):
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
class TestBlipTVSubtitles(BaseTestSubtitles):
url = 'http://blip.tv/a/a-6603250'
IE = BlipTVIE
def test_list_subtitles(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_allsubtitles(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
self.assertEqual(md5(subtitles['en']), '5b75c300af65fe4476dff79478bb93e4')
class TestVimeoSubtitles(BaseTestSubtitles):
url = 'http://vimeo.com/76979871'
IE = VimeoIE
def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['fr']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
def test_list_subtitles(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
def test_nosubtitles(self):
self.DL.expect_warning(u'video doesn\'t have subtitles')
self.url = 'http://vimeo.com/56015672'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
def test_multiple_langs(self):
self.DL.params['writesubtitles'] = True
langs = ['es', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
if __name__ == '__main__':
unittest.main()

View File

@@ -117,6 +117,13 @@ class TestYoutubeLists(unittest.TestCase):
original_video = entries[0]
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
def test_youtube_toptracks(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/playlist?list=MCUS')
entries = result['entries']
self.assertEqual(len(entries), 100)
def test_youtube_toplist(self):
dl = FakeYDL()
ie = YoutubeTopListIE(dl)

View File

@@ -27,6 +27,12 @@ _TESTS = [
85,
u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
),
(
u'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
u'js',
90,
u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
),
]

24
youtube-dl.plugin.zsh Normal file
View File

@@ -0,0 +1,24 @@
# This allows the youtube-dl command to be installed in ZSH using antigen.
# Antigen is a bundle manager. It allows you to enhance the functionality of
# your zsh session by installing bundles and themes easily.
# Antigen documentation:
# http://antigen.sharats.me/
# https://github.com/zsh-users/antigen
# Install youtube-dl:
# antigen bundle rg3/youtube-dl
# Bundles installed by antigen are available for use immediately.
# Update youtube-dl (and all other antigen bundles):
# antigen update
# The antigen command will download the git repository to a folder and then
# execute an enabling script (this file). The complete process for loading the
# code is documented here:
# https://github.com/zsh-users/antigen#notes-on-writing-plugins
# This specific script just aliases youtube-dl to the python script that this
# library provides. This requires updating the PYTHONPATH to ensure that the
# full set of code can be located.
alias youtube-dl="PYTHONPATH=$(dirname $0) $(dirname $0)/bin/youtube-dl"

View File

@@ -41,6 +41,7 @@ __authors__ = (
'Chris Gahan',
'Saimadhav Heblikar',
'Mike Col',
'Andreas Schmitz',
)
__license__ = 'Public Domain'
@@ -100,6 +101,43 @@ def parseOpts(overrideArguments=None):
optionf.close()
return res
def _readUserConf():
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
if xdg_config_home:
userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
if not os.path.isfile(userConfFile):
userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
else:
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
if not os.path.isfile(userConfFile):
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
userConf = _readOptions(userConfFile, None)
if userConf is None:
appdata_dir = os.environ.get('appdata')
if appdata_dir:
userConf = _readOptions(
os.path.join(appdata_dir, 'youtube-dl', 'config'),
default=None)
if userConf is None:
userConf = _readOptions(
os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
default=None)
if userConf is None:
userConf = _readOptions(
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
default=None)
if userConf is None:
userConf = _readOptions(
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
default=None)
if userConf is None:
userConf = []
return userConf
def _format_option_string(option):
''' ('-o', '--option') -> -o, --format METAVAR'''
@@ -203,6 +241,11 @@ def parseOpts(overrideArguments=None):
general.add_option('--default-search',
dest='default_search', metavar='PREFIX',
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
general.add_option(
'--ignore-config',
action='store_true',
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
selection.add_option(
'--playlist-start',
@@ -457,44 +500,18 @@ def parseOpts(overrideArguments=None):
if opts.verbose:
write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
else:
systemConf = _readOptions('/etc/youtube-dl.conf')
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
if xdg_config_home:
userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
if not os.path.isfile(userConfFile):
userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
else:
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
if not os.path.isfile(userConfFile):
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
userConf = _readOptions(userConfFile, None)
if userConf is None:
appdata_dir = os.environ.get('appdata')
if appdata_dir:
userConf = _readOptions(
os.path.join(appdata_dir, 'youtube-dl', 'config'),
default=None)
if userConf is None:
userConf = _readOptions(
os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
default=None)
if userConf is None:
userConf = _readOptions(
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
default=None)
if userConf is None:
userConf = _readOptions(
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
default=None)
if userConf is None:
userConf = []
commandLineConf = sys.argv[1:]
if '--ignore-config' in commandLineConf:
systemConf = []
userConf = []
else:
systemConf = _readOptions('/etc/youtube-dl.conf')
if '--ignore-config' in systemConf:
userConf = []
else:
userConf = _readUserConf()
argv = systemConf + userConf + commandLineConf
opts, args = parser.parse_args(argv)
if opts.verbose:
write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')

View File

@@ -87,8 +87,10 @@ class RtmpFD(FileDownloader):
url = info_dict['url']
player_url = info_dict.get('player_url', None)
page_url = info_dict.get('page_url', None)
app = info_dict.get('app', None)
play_path = info_dict.get('play_path', None)
tc_url = info_dict.get('tc_url', None)
flash_version = info_dict.get('flash_version', None)
live = info_dict.get('rtmp_live', False)
conn = info_dict.get('rtmp_conn', None)
@@ -111,12 +113,16 @@ class RtmpFD(FileDownloader):
basic_args += ['--swfVfy', player_url]
if page_url is not None:
basic_args += ['--pageUrl', page_url]
if app is not None:
basic_args += ['--app', app]
if play_path is not None:
basic_args += ['--playpath', play_path]
if tc_url is not None:
basic_args += ['--tcUrl', url]
if test:
basic_args += ['--stop', '1']
if flash_version is not None:
basic_args += ['--flashVer', flash_version]
if live:
basic_args += ['--live']
if conn:

View File

@@ -15,6 +15,7 @@ from .arte import (
from .auengine import AUEngineIE
from .bambuser import BambuserIE, BambuserChannelIE
from .bandcamp import BandcampIE, BandcampAlbumIE
from .bbccouk import BBCCoUkIE
from .blinkx import BlinkxIE
from .bliptv import BlipTVIE, BlipTVUserIE
from .bloomberg import BloombergIE
@@ -25,6 +26,7 @@ from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .cbs import CBSIE
from .channel9 import Channel9IE
from .chilloutzone import ChilloutzoneIE
from .cinemassacre import CinemassacreIE
from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE
@@ -54,12 +56,14 @@ from .ebaumsworld import EbaumsWorldIE
from .ehow import EHowIE
from .eighttracks import EightTracksIE
from .eitb import EitbIE
from .elpais import ElPaisIE
from .escapist import EscapistIE
from .everyonesmixtape import EveryonesMixtapeIE
from .exfm import ExfmIE
from .extremetube import ExtremeTubeIE
from .facebook import FacebookIE
from .faz import FazIE
from .firstpost import FirstpostIE
from .fktv import (
FKTVIE,
FKTVPosteckeIE,
@@ -96,6 +100,7 @@ from .ina import InaIE
from .infoq import InfoQIE
from .instagram import InstagramIE
from .internetvideoarchive import InternetVideoArchiveIE
from .iprima import IPrimaIE
from .ivi import (
IviIE,
IviCompilationIE
@@ -110,12 +115,14 @@ from .khanacademy import KhanAcademyIE
from .kickstarter import KickStarterIE
from .keek import KeekIE
from .la7 import LA7IE
from .lifenews import LifeNewsIE
from .liveleak import LiveLeakIE
from .livestream import LivestreamIE, LivestreamOriginalIE
from .lynda import (
LyndaIE,
LyndaCourseIE
)
from .m6 import M6IE
from .macgamestore import MacGameStoreIE
from .malemotion import MalemotionIE
from .mdr import MDRIE
@@ -125,6 +132,7 @@ from .mit import TechTVMITIE, MITIE
from .mixcloud import MixcloudIE
from .mpora import MporaIE
from .mofosex import MofosexIE
from .mooshare import MooshareIE
from .mtv import (
MTVIE,
MTVIggyIE,
@@ -136,11 +144,14 @@ from .myvideo import MyVideoIE
from .naver import NaverIE
from .nba import NBAIE
from .nbc import NBCNewsIE
from .ndr import NDRIE
from .ndtv import NDTVIE
from .newgrounds import NewgroundsIE
from .nfb import NFBIE
from .nhl import NHLIE, NHLVideocenterIE
from .niconico import NiconicoIE
from .ninegag import NineGagIE
from .normalboots import NormalbootsIE
from .novamov import NovamovIE
from .nowness import NownessIE
from .nowvideo import NowVideoIE
@@ -198,6 +209,7 @@ from .ted import TEDIE
from .tf1 import TF1IE
from .theplatform import ThePlatformIE
from .thisav import ThisAVIE
from .tinypic import TinyPicIE
from .toutv import TouTvIE
from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE
@@ -228,6 +240,7 @@ from .vimeo import (
from .vine import VineIE
from .viki import VikiIE
from .vk import VKIE
from .vube import VubeIE
from .wat import WatIE
from .weibo import WeiboIE
from .wimp import WimpIE

View File

@@ -0,0 +1,116 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class BBCCoUkIE(InfoExtractor):
IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC - iPlayer Radio'
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>[\da-z]{8})'
_TEST = {
'url': 'http://www.bbc.co.uk/programmes/p01q7wz1',
'info_dict': {
'id': 'p01q7wz4',
'ext': 'flv',
'title': 'Friction: Blu Mar Ten guest mix: Blu Mar Ten - Guest Mix',
'description': 'Blu Mar Ten deliver a Guest Mix for Friction.',
'duration': 1936,
},
'params': {
# rtmp download
'skip_download': True,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
group_id = mobj.group('id')
playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
'Downloading playlist XML')
item = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}item')
if item is None:
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
if no_items is not None:
reason = no_items.get('reason')
if reason == 'preAvailability':
msg = 'Episode %s is not yet available' % group_id
elif reason == 'postAvailability':
msg = 'Episode %s is no longer available' % group_id
else:
msg = 'Episode %s is not available: %s' % (group_id, reason)
raise ExtractorError(msg, expected=True)
raise ExtractorError('Failed to extract media for episode %s' % group_id, expected=True)
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
radio_programme_id = item.get('identifier')
duration = int(item.get('duration'))
media_selection = self._download_xml(
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % radio_programme_id,
radio_programme_id, 'Downloading media selection XML')
formats = []
for media in media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media'):
bitrate = int(media.get('bitrate'))
encoding = media.get('encoding')
service = media.get('service')
connection = media.find('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
protocol = connection.get('protocol')
priority = connection.get('priority')
supplier = connection.get('supplier')
if protocol == 'http':
href = connection.get('href')
# ASX playlist
if supplier == 'asx':
asx = self._download_xml(href, radio_programme_id, 'Downloading %s ASX playlist' % service)
for i, ref in enumerate(asx.findall('./Entry/ref')):
formats.append({
'url': ref.get('href'),
'format_id': '%s_ref%s' % (service, i),
'abr': bitrate,
'acodec': encoding,
'preference': priority,
})
continue
# Direct link
formats.append({
'url': href,
'format_id': service,
'abr': bitrate,
'acodec': encoding,
'preference': priority,
})
elif protocol == 'rtmp':
application = connection.get('application', 'ondemand')
auth_string = connection.get('authString')
identifier = connection.get('identifier')
server = connection.get('server')
formats.append({
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
'play_path': identifier,
'app': '%s?%s' % (application, auth_string),
'rtmp_live': False,
'ext': 'flv',
'format_id': service,
'abr': bitrate,
'acodec': encoding,
'preference': priority,
})
self._sort_formats(formats)
return {
'id': radio_programme_id,
'title': title,
'description': description,
'duration': duration,
'formats': formats,
}

View File

@@ -1,128 +1,137 @@
from __future__ import unicode_literals
import datetime
import json
import re
import socket
from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..utils import (
compat_http_client,
compat_str,
compat_urllib_error,
compat_urllib_request,
ExtractorError,
unescapeHTML,
)
class BlipTVIE(InfoExtractor):
class BlipTVIE(SubtitlesInfoExtractor):
"""Information extractor for blip.tv"""
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$'
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(?P<presumptive_id>.+)$'
_TEST = {
_TESTS = [{
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
'file': '5779306.mov',
'md5': 'c6934ad0b6acf2bd920720ec888eb812',
'info_dict': {
'id': '5779306',
'ext': 'mov',
'upload_date': '20111205',
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
'uploader': 'Comic Book Resources - CBR TV',
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
}
}
def report_direct_download(self, title):
"""Report information extraction."""
self.to_screen('%s: Direct download detected' % title)
}, {
# https://github.com/rg3/youtube-dl/pull/2274
'note': 'Video with subtitles',
'url': 'http://blip.tv/play/h6Uag5OEVgI.html',
'md5': '309f9d25b820b086ca163ffac8031806',
'info_dict': {
'id': '6586561',
'ext': 'mp4',
'uploader': 'Red vs. Blue',
'description': 'One-Zero-One',
'upload_date': '20130614',
'title': 'Red vs. Blue Season 11 Episode 1',
}
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError('Invalid URL: %s' % url)
presumptive_id = mobj.group('presumptive_id')
# See https://github.com/rg3/youtube-dl/issues/857
embed_mobj = re.search(r'^(?:https?://)?(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url)
embed_mobj = re.match(r'https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url)
if embed_mobj:
info_url = 'http://blip.tv/play/%s.x?p=1' % embed_mobj.group(1)
info_page = self._download_webpage(info_url, embed_mobj.group(1))
video_id = self._search_regex(r'data-episode-id="(\d+)', info_page, 'video_id')
video_id = self._search_regex(
r'data-episode-id="([0-9]+)', info_page, 'video_id')
return self.url_result('http://blip.tv/a/a-' + video_id, 'BlipTV')
if '?' in url:
cchar = '&'
else:
cchar = '?'
cchar = '&' if '?' in url else '?'
json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
request = compat_urllib_request.Request(json_url)
request.add_header('User-Agent', 'iTunes/10.6.1')
self.report_extraction(mobj.group(1))
urlh = self._request_webpage(request, None, False,
'unable to download video info webpage')
try:
json_code_bytes = urlh.read()
json_code = json_code_bytes.decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError('Unable to read video info webpage: %s' % compat_str(err))
json_data = self._download_json(request, video_id=presumptive_id)
try:
json_data = json.loads(json_code)
if 'Post' in json_data:
data = json_data['Post']
else:
data = json_data
if 'Post' in json_data:
data = json_data['Post']
else:
data = json_data
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
formats = []
if 'additionalMedia' in data:
for f in sorted(data['additionalMedia'], key=lambda f: int(f['media_height'])):
if not int(f['media_width']): # filter m3u8
continue
formats.append({
'url': f['url'],
'format_id': f['role'],
'width': int(f['media_width']),
'height': int(f['media_height']),
})
else:
video_id = compat_str(data['item_id'])
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
subtitles = {}
formats = []
if 'additionalMedia' in data:
for f in data['additionalMedia']:
if f.get('file_type_srt') == 1:
LANGS = {
'english': 'en',
}
lang = f['role'].rpartition('-')[-1].strip().lower()
langcode = LANGS.get(lang, lang)
subtitles[langcode] = f['url']
continue
if not int(f['media_width']): # filter m3u8
continue
formats.append({
'url': data['media']['url'],
'width': int(data['media']['width']),
'height': int(data['media']['height']),
'url': f['url'],
'format_id': f['role'],
'width': int(f['media_width']),
'height': int(f['media_height']),
})
else:
formats.append({
'url': data['media']['url'],
'width': int(data['media']['width']),
'height': int(data['media']['height']),
})
self._sort_formats(formats)
self._sort_formats(formats)
# subtitles
video_subtitles = self.extract_subtitles(video_id, subtitles)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
return {
'id': compat_str(data['item_id']),
'uploader': data['display_name'],
'upload_date': upload_date,
'title': data['title'],
'thumbnail': data['thumbnailUrl'],
'description': data['description'],
'user_agent': 'iTunes/10.6.1',
'formats': formats,
}
except (ValueError, KeyError) as err:
raise ExtractorError('Unable to parse video information: %s' % repr(err))
return {
'id': video_id,
'uploader': data['display_name'],
'upload_date': upload_date,
'title': data['title'],
'thumbnail': data['thumbnailUrl'],
'description': data['description'],
'user_agent': 'iTunes/10.6.1',
'formats': formats,
'subtitles': video_subtitles,
}
def _download_subtitle_url(self, sub_lang, url):
# For some weird reason, blip.tv serves a video instead of subtitles
# when we request with a common UA
req = compat_urllib_request.Request(url)
req.add_header('Youtubedl-user-agent', 'youtube-dl')
return self._download_webpage(req, None, note=False)
class BlipTVUserIE(InfoExtractor):
"""Information Extractor for blip.tv users."""
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
_PAGE_SIZE = 12
IE_NAME = 'blip.tv:user'
def _real_extract(self, url):
# Extract username
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError('Invalid URL: %s' % url)
username = mobj.group(1)
page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
@@ -131,7 +140,6 @@ class BlipTVUserIE(InfoExtractor):
mobj = re.search(r'data-users-id="([^"]+)"', page)
page_base = page_base % mobj.group(1)
# Download video ids using BlipTV Ajax calls. Result size per
# query is limited (currently to 12 videos) so we need to query
# page by page until there are no video ids - it means we got
@@ -142,8 +150,8 @@ class BlipTVUserIE(InfoExtractor):
while True:
url = page_base + "&page=" + str(pagenum)
page = self._download_webpage(url, username,
'Downloading video ids from page %d' % pagenum)
page = self._download_webpage(
url, username, 'Downloading video ids from page %d' % pagenum)
# Extract video identifiers
ids_in_page = []
@@ -167,4 +175,4 @@ class BlipTVUserIE(InfoExtractor):
urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
return [self.playlist_result(url_entries, playlist_title = username)]
return [self.playlist_result(url_entries, playlist_title=username)]

View File

@@ -24,5 +24,5 @@ class BloombergIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
webpage = self._download_webpage(url, name)
ooyala_code = self._search_regex(r'<source src="http://player.ooyala.com/player/[^/]+/([^".]+)', webpage, u'ooyala url')
return OoyalaIE._build_url_result(ooyala_code)
ooyala_url = self._twitter_search_player(webpage)
return self.url_result(ooyala_url, OoyalaIE.ie_key())

View File

@@ -127,25 +127,28 @@ class BrightcoveIE(InfoExtractor):
@classmethod
def _extract_brightcove_url(cls, webpage):
"""Try to extract the brightcove url from the wepbage, returns None
"""Try to extract the brightcove url from the webpage, returns None
if it can't be found
"""
urls = cls._extract_brightcove_urls(webpage)
return urls[0] if urls else None
@classmethod
def _extract_brightcove_urls(cls, webpage):
"""Return a list of all Brightcove URLs from the webpage """
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
if url_m:
return url_m.group(1)
return [url_m.group(1)]
m_brightcove = re.search(
matches = re.findall(
r'''(?sx)<object
(?:
[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 |
[^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
).+?</object>''',
webpage)
if m_brightcove is not None:
return cls._build_brighcove_url(m_brightcove.group())
else:
return None
return [cls._build_brighcove_url(m) for m in matches]
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})

View File

@@ -15,14 +15,15 @@ class Channel9IE(InfoExtractor):
'''
IE_DESC = 'Channel 9'
IE_NAME = 'channel9'
_VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
_VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
_TESTS = [
{
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
'file': 'Events_TechEd_Australia_2013_KOS002.mp4',
'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
'info_dict': {
'id': 'Events/TechEd/Australia/2013/KOS002',
'ext': 'mp4',
'title': 'Developer Kick-Off Session: Stuff We Love',
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
'duration': 4576,
@@ -35,9 +36,10 @@ class Channel9IE(InfoExtractor):
},
{
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
'file': 'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
'info_dict': {
'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
'ext': 'mp4',
'title': 'Self-service BI with Power BI - nuclear testing',
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
'duration': 1540,

View File

@@ -0,0 +1,97 @@
from __future__ import unicode_literals
import re
import base64
import json
from .common import InfoExtractor
from ..utils import (
clean_html,
ExtractorError
)
class ChilloutzoneIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w|-]+)\.html'
_TESTS = [{
'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',
'md5': 'a76f3457e813ea0037e5244f509e66d1',
'info_dict': {
'id': 'enemene-meck-alle-katzen-weg',
'ext': 'mp4',
'title': 'Enemene Meck - Alle Katzen weg',
'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?',
},
}, {
'note': 'Video hosted at YouTube',
'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html',
'info_dict': {
'id': '1YVQaAgHyRU',
'ext': 'mp4',
'title': '16 Photos Taken 1 Second Before Disaster',
'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814',
'uploader': 'BuzzFeedVideo',
'uploader_id': 'BuzzFeedVideo',
'upload_date': '20131105',
},
}, {
'note': 'Video hosted at Vimeo',
'url': 'http://www.chilloutzone.net/video/icon-blending.html',
'md5': '2645c678b8dc4fefcc0e1b60db18dac1',
'info_dict': {
'id': '85523671',
'ext': 'mp4',
'title': 'The Sunday Times - Icons',
'description': 'md5:3e5e8e839f076a637c6b9406c8f25c4c',
'uploader': 'Us',
'uploader_id': 'usfilms',
'upload_date': '20140131'
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
base64_video_info = self._html_search_regex(
r'var cozVidData = "(.+?)";', webpage, 'video data')
decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8")
video_info_dict = json.loads(decoded_video_info)
# get video information from dict
video_url = video_info_dict['mediaUrl']
description = clean_html(video_info_dict.get('description'))
title = video_info_dict['title']
native_platform = video_info_dict['nativePlatform']
native_video_id = video_info_dict['nativeVideoId']
source_priority = video_info_dict['sourcePriority']
# If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
if native_platform is None:
youtube_url = self._html_search_regex(
r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
webpage, 'fallback video URL', default=None)
if youtube_url is not None:
return self.url_result(youtube_url, ie='Youtube')
# Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
# the own CDN
if source_priority == 'native':
if native_platform == 'youtube':
return self.url_result(native_video_id, ie='Youtube')
if native_platform == 'vimeo':
return self.url_result(
'http://vimeo.com/' + native_video_id, ie='Vimeo')
if not video_url:
raise ExtractorError('No video found')
return {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': title,
'description': description,
}

View File

@@ -1,12 +1,9 @@
from __future__ import unicode_literals
import re
import string
from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
translation_table = {
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',

View File

@@ -4,6 +4,7 @@ import json
import re
from .common import InfoExtractor
from ..utils import int_or_none
class CollegeHumorIE(InfoExtractor):
@@ -11,24 +12,45 @@ class CollegeHumorIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
'file': '6902724.mp4',
'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
'info_dict': {
'id': '6902724',
'ext': 'mp4',
'title': 'Comic-Con Cosplay Catastrophe',
'description': 'Fans get creative this year at San Diego. Too',
'description': 'Fans get creative this year',
'age_limit': 13,
},
},
{
'url': 'http://www.collegehumor.com/video/3505939/font-conference',
'file': '3505939.mp4',
'md5': '72fa701d8ef38664a4dbb9e2ab721816',
'info_dict': {
'id': '3505939',
'ext': 'mp4',
'title': 'Font Conference',
'description': 'This video wasn\'t long enough, so we made it double-spaced.',
'description': 'This video wasn\'t long enough,',
'age_limit': 10,
'duration': 179,
},
}]
},
# embedded youtube video
{
'url': 'http://www.collegehumor.com/embed/6950457',
'info_dict': {
'id': 'W5gMp3ZjYg4',
'ext': 'mp4',
'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
'uploader': 'Funnyplox TV',
'uploader_id': 'funnyploxtv',
'description': 'md5:11812366244110c3523968aa74f02521',
'upload_date': '20140128',
},
'params': {
'skip_download': True,
},
'add_ie': ['Youtube'],
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -38,6 +60,12 @@ class CollegeHumorIE(InfoExtractor):
data = json.loads(self._download_webpage(
jsonUrl, video_id, 'Downloading info JSON'))
vdata = data['video']
if vdata.get('youtubeId') is not None:
return {
'_type': 'url',
'url': vdata['youtubeId'],
'ie_key': 'Youtube',
}
AGE_LIMITS = {'nc17': 18, 'r': 18, 'pg13': 13, 'pg': 10, 'g': 0}
rating = vdata.get('rating')
@@ -49,7 +77,7 @@ class CollegeHumorIE(InfoExtractor):
PREFS = {'high_quality': 2, 'low_quality': 0}
formats = []
for format_key in ('mp4', 'webm'):
for qname, qurl in vdata[format_key].items():
for qname, qurl in vdata.get(format_key, {}).items():
formats.append({
'format_id': format_key + '_' + qname,
'url': qurl,
@@ -58,6 +86,8 @@ class CollegeHumorIE(InfoExtractor):
})
self._sort_formats(formats)
duration = int_or_none(vdata.get('duration'), 1000)
return {
'id': video_id,
'title': vdata['title'],
@@ -65,4 +95,5 @@ class CollegeHumorIE(InfoExtractor):
'thumbnail': vdata.get('thumbnail'),
'formats': formats,
'age_limit': age_limit,
'duration': duration,
}

View File

@@ -14,7 +14,7 @@ from ..utils import (
class ComedyCentralIE(MTVServicesInfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www.)?comedycentral.com/
_VALID_URL = r'''(?x)https?://(?:www\.)?comedycentral\.com/
(video-clips|episodes|cc-studios|video-collections)
/(?P<title>.*)'''
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
@@ -86,7 +86,7 @@ class ComedyCentralShowsIE(InfoExtractor):
@staticmethod
def _transform_rtmp_url(rtmp_video_url):
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\.comedystor/.*)$', rtmp_video_url)
if not m:
raise ExtractorError('Cannot transform RTMP url')
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'

View File

@@ -399,7 +399,7 @@ class InfoExtractor(object):
# Helper functions for extracting OpenGraph info
@staticmethod
def _og_regexes(prop):
content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')'
content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\')'
property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop)
template = r'<meta[^>]+?%s[^>]+?%s'
return [
@@ -465,6 +465,10 @@ class InfoExtractor(object):
}
return RATING_TABLE.get(rating.lower(), None)
def _twitter_search_player(self, html):
return self._html_search_meta('twitter:player', html,
'twitter card player')
def _sort_formats(self, formats):
if not formats:
raise ExtractorError(u'No video formats found')

View File

@@ -1,4 +1,6 @@
# encoding: utf-8
from __future__ import unicode_literals
import re, base64, zlib
from hashlib import sha1
from math import pow, sqrt, floor
@@ -18,29 +20,29 @@ from ..aes import (
)
class CrunchyrollIE(InfoExtractor):
_VALID_URL = r'(?:https?://)?(?:www\.)?(?P<url>crunchyroll\.com/[^/]*/[^/?&]*?(?P<video_id>[0-9]+))(?:[/?&]|$)'
_VALID_URL = r'(?:https?://)?(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
_TESTS = [{
u'url': u'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
u'file': u'645513.flv',
#u'md5': u'b1639fd6ddfaa43788c85f6d1dddd412',
u'info_dict': {
u'title': u'Wanna be the Strongest in the World Episode 1 An Idol-Wrestler is Born!',
u'description': u'md5:2d17137920c64f2f49981a7797d275ef',
u'thumbnail': u'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
u'uploader': u'Yomiuri Telecasting Corporation (YTV)',
u'upload_date': u'20131013',
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
'file': '645513.flv',
#'md5': 'b1639fd6ddfaa43788c85f6d1dddd412',
'info_dict': {
'title': 'Wanna be the Strongest in the World Episode 1 An Idol-Wrestler is Born!',
'description': 'md5:2d17137920c64f2f49981a7797d275ef',
'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
'uploader': 'Yomiuri Telecasting Corporation (YTV)',
'upload_date': '20131013',
},
u'params': {
'params': {
# rtmp
u'skip_download': True,
'skip_download': True,
},
}]
_FORMAT_IDS = {
u'360': (u'60', u'106'),
u'480': (u'61', u'106'),
u'720': (u'62', u'106'),
u'1080': (u'80', u'108'),
'360': ('60', '106'),
'480': ('61', '106'),
'720': ('62', '106'),
'1080': ('80', '108'),
}
def _decrypt_subtitles(self, data, iv, id):
@@ -63,7 +65,7 @@ class CrunchyrollIE(InfoExtractor):
num3 = key ^ num1
num4 = num3 ^ (num3 >> 3) ^ num2
prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2)))
shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode(u'ascii')).digest())
shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest())
# Extend 160 Bit hash to 256 Bit
return shaHash + [0] * 12
@@ -79,93 +81,98 @@ class CrunchyrollIE(InfoExtractor):
def _convert_subtitles_to_srt(self, subtitles):
i=1
output = u''
output = ''
for start, end, text in re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles):
start = start.replace(u'.', u',')
end = end.replace(u'.', u',')
start = start.replace('.', ',')
end = end.replace('.', ',')
text = clean_html(text)
text = text.replace(u'\\N', u'\n')
text = text.replace('\\N', '\n')
if not text:
continue
output += u'%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
i+=1
return output
def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
webpage_url = u'http://www.' + mobj.group('url')
video_id = mobj.group(u'video_id')
webpage = self._download_webpage(webpage_url, video_id)
note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, u'trailer-notice', default=u'')
if mobj.group('prefix') == 'm':
mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage')
webpage_url = self._search_regex(r'<link rel="canonical" href="([^"]+)" />', mobile_webpage, 'webpage_url')
else:
webpage_url = 'http://www.' + mobj.group('url')
webpage = self._download_webpage(webpage_url, video_id, 'Downloading webpage')
note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, 'trailer-notice', default='')
if note_m:
raise ExtractorError(note_m)
video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, u'video_title', flags=re.DOTALL)
video_title = re.sub(r' {2,}', u' ', video_title)
video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, u'video_description', default=u'')
video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL)
video_title = re.sub(r' {2,}', ' ', video_title)
video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='')
if not video_description:
video_description = None
video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, u'video_upload_date', fatal=False, flags=re.DOTALL)
video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
if video_upload_date:
video_upload_date = unified_strdate(video_upload_date)
video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, u'video_uploader', fatal=False, flags=re.DOTALL)
video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL)
playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, u'playerdata_url'))
playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
playerdata_req = compat_urllib_request.Request(playerdata_url)
playerdata_req.data = compat_urllib_parse.urlencode({u'current_page': webpage_url})
playerdata_req.add_header(u'Content-Type', u'application/x-www-form-urlencoded')
playerdata = self._download_webpage(playerdata_req, video_id, note=u'Downloading media info')
playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url})
playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info')
stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, u'stream_id')
video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, u'thumbnail', fatal=False)
stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, 'stream_id')
video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False)
formats = []
for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage):
stream_quality, stream_format = self._FORMAT_IDS[fmt]
video_format = fmt+u'p'
streamdata_req = compat_urllib_request.Request(u'http://www.crunchyroll.com/xml/')
video_format = fmt+'p'
streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
# urlencode doesn't work!
streamdata_req.data = u'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+u'&media%5Fid='+stream_id+u'&video%5Fformat='+stream_format
streamdata_req.add_header(u'Content-Type', u'application/x-www-form-urlencoded')
streamdata_req.add_header(u'Content-Length', str(len(streamdata_req.data)))
streamdata = self._download_webpage(streamdata_req, video_id, note=u'Downloading media info for '+video_format)
video_url = self._search_regex(r'<host>([^<]+)', streamdata, u'video_url')
video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, u'video_play_path')
streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+'&media%5Fid='+stream_id+'&video%5Fformat='+stream_format
streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
streamdata_req.add_header('Content-Length', str(len(streamdata_req.data)))
streamdata = self._download_webpage(streamdata_req, video_id, note='Downloading media info for '+video_format)
video_url = self._search_regex(r'<host>([^<]+)', streamdata, 'video_url')
video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, 'video_play_path')
formats.append({
u'url': video_url,
u'play_path': video_play_path,
u'ext': 'flv',
u'format': video_format,
u'format_id': video_format,
'url': video_url,
'play_path': video_play_path,
'ext': 'flv',
'format': video_format,
'format_id': video_format,
})
subtitles = {}
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
sub_page = self._download_webpage(u'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
video_id, note=u'Downloading subtitles for '+sub_name)
id = self._search_regex(r'id=\'([0-9]+)', sub_page, u'subtitle_id', fatal=False)
iv = self._search_regex(r'<iv>([^<]+)', sub_page, u'subtitle_iv', fatal=False)
data = self._search_regex(r'<data>([^<]+)', sub_page, u'subtitle_data', fatal=False)
sub_page = self._download_webpage('http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
video_id, note='Downloading subtitles for '+sub_name)
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
if not id or not iv or not data:
continue
id = int(id)
iv = base64.b64decode(iv)
data = base64.b64decode(data)
subtitle = self._decrypt_subtitles(data, iv, id).decode(u'utf-8')
lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, u'subtitle_lang_code', fatal=False)
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, 'subtitle_lang_code', fatal=False)
if not lang_code:
continue
subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
return {
u'id': video_id,
u'title': video_title,
u'description': video_description,
u'thumbnail': video_thumbnail,
u'uploader': video_uploader,
u'upload_date': video_upload_date,
u'subtitles': subtitles,
u'formats': formats,
'id': video_id,
'title': video_title,
'description': video_description,
'thumbnail': video_thumbnail,
'uploader': video_uploader,
'upload_date': video_upload_date,
'subtitles': subtitles,
'formats': formats,
}

View File

@@ -1,49 +1,60 @@
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
unescapeHTML,
find_xpath_attr,
)
class CSpanIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?c-spanvideo\.org/program/(?P<name>.*)'
_VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>\d+)'
IE_DESC = 'C-SPAN'
_TEST = {
'url': 'http://www.c-spanvideo.org/program/HolderonV',
'file': '315139.mp4',
'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
'md5': '8e44ce11f0f725527daccc453f553eb0',
'info_dict': {
'id': '315139',
'ext': 'mp4',
'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in [Shelby County v. Holder] in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
},
'skip': 'Regularly fails on travis, for unknown reasons',
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
prog_name = mobj.group('name')
webpage = self._download_webpage(url, prog_name)
video_id = self._search_regex(r'prog(?:ram)?id=(.*?)&', webpage, 'video id')
page_id = mobj.group('id')
webpage = self._download_webpage(url, page_id)
video_id = self._search_regex(r'data-progid=\'(\d+)\'>', webpage, 'video id')
title = self._html_search_regex(
r'<!-- title -->\n\s*<h1[^>]*>(.*?)</h1>', webpage, 'title')
description = self._og_search_description(webpage)
description = self._html_search_regex(
[
# The full description
r'<div class=\'expandable\'>(.*?)<a href=\'#\'',
# If the description is small enough the other div is not
# present, otherwise this is a stripped version
r'<p class=\'initial\'>(.*?)</p>'
],
webpage, 'description', flags=re.DOTALL)
info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
data_json = self._download_webpage(
info_url, video_id, 'Downloading video info')
data = json.loads(data_json)
data = self._download_json(info_url, video_id)
url = unescapeHTML(data['video']['files'][0]['path']['#text'])
doc = self._download_xml('http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
video_id)
def find_string(s):
return find_xpath_attr(doc, './/string', 'name', s).text
return {
'id': video_id,
'title': title,
'title': find_string('title'),
'url': url,
'description': description,
'thumbnail': self._og_search_thumbnail(webpage),
'thumbnail': find_string('poster'),
}

View File

@@ -0,0 +1,58 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import unified_strdate
class ElPaisIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
IE_DESC = 'El País'
_TEST = {
'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
'md5': '98406f301f19562170ec071b83433d55',
'info_dict': {
'id': 'tiempo-nuevo-recetas-viejas',
'ext': 'mp4',
'title': 'Tiempo nuevo, recetas viejas',
'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.',
'upload_date': '20140206',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
prefix = self._html_search_regex(
r'var url_cache = "([^"]+)";', webpage, 'URL prefix')
video_suffix = self._search_regex(
r"URLMediaFile = url_cache \+ '([^']+)'", webpage, 'video URL')
video_url = prefix + video_suffix
thumbnail_suffix = self._search_regex(
r"URLMediaStill = url_cache \+ '([^']+)'", webpage, 'thumbnail URL',
fatal=False)
thumbnail = (
None if thumbnail_suffix is None
else prefix + thumbnail_suffix)
title = self._html_search_regex(
'<h2 class="entry-header entry-title.*?>(.*?)</h2>',
webpage, 'title')
date_str = self._search_regex(
r'<p class="date-header date-int updated"\s+title="([^"]+)">',
webpage, 'upload date', fatal=False)
upload_date = (None if date_str is None else unified_strdate(date_str))
return {
'id': video_id,
'url': video_url,
'title': title,
'description': self._og_search_description(webpage),
'thumbnail': thumbnail,
'upload_date': upload_date,
}

View File

@@ -0,0 +1,38 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class FirstpostIE(InfoExtractor):
IE_NAME = 'Firstpost.com'
_VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'
_TEST = {
'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html',
'md5': 'ee9114957692f01fb1263ed87039112a',
'info_dict': {
'id': '1025403',
'ext': 'mp4',
'title': 'India to launch indigenous aircraft carrier INS Vikrant today',
'description': 'Its flight deck is over twice the size of a football field, its power unit can light up the entire Kochi city and the cabling is enough to cover the distance between here to Delhi.',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(
r'<div.*?name="div_video".*?flashvars="([^"]+)">',
webpage, 'video URL')
return {
'id': video_id,
'url': video_url,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
}

View File

@@ -1,4 +1,7 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
import json
@@ -30,7 +33,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
class PluzzIE(FranceTVBaseInfoExtractor):
IE_NAME = u'pluzz.francetv.fr'
IE_NAME = 'pluzz.francetv.fr'
_VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
# Can't use tests, videos expire in 7 days
@@ -44,17 +47,17 @@ class PluzzIE(FranceTVBaseInfoExtractor):
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
IE_NAME = u'francetvinfo.fr'
IE_NAME = 'francetvinfo.fr'
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+)\.html'
_TEST = {
u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
u'file': u'84981923.mp4',
u'info_dict': {
u'title': u'Soir 3',
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
'file': '84981923.mp4',
'info_dict': {
'title': 'Soir 3',
},
u'params': {
u'skip_download': True,
'params': {
'skip_download': True,
},
}
@@ -62,13 +65,13 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
mobj = re.match(self._VALID_URL, url)
page_title = mobj.group('title')
webpage = self._download_webpage(url, page_title)
video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id')
video_id = self._search_regex(r'id-video=(\d+?)[@"]', webpage, 'video id')
return self._extract_video(video_id)
class FranceTVIE(FranceTVBaseInfoExtractor):
IE_NAME = u'francetv'
IE_DESC = u'France 2, 3, 4, 5 and Ô'
IE_NAME = 'francetv'
IE_DESC = 'France 2, 3, 4, 5 and Ô'
_VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/
(?:
emissions/.*?/(videos|emissions)/(?P<id>[^/?]+)
@@ -78,73 +81,73 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
_TESTS = [
# france2
{
u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
u'file': u'75540104.mp4',
u'info_dict': {
u'title': u'13h15, le samedi...',
u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d',
'url': 'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
'file': '75540104.mp4',
'info_dict': {
'title': '13h15, le samedi...',
'description': 'md5:2e5b58ba7a2d3692b35c792be081a03d',
},
u'params': {
'params': {
# m3u8 download
u'skip_download': True,
'skip_download': True,
},
},
# france3
{
u'url': u'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
u'info_dict': {
u'id': u'000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
u'ext': u'flv',
u'title': u'Le scandale du prix des médicaments',
u'description': u'md5:1384089fbee2f04fc6c9de025ee2e9ce',
'url': 'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
'info_dict': {
'id': '000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
'ext': 'flv',
'title': 'Le scandale du prix des médicaments',
'description': 'md5:1384089fbee2f04fc6c9de025ee2e9ce',
},
u'params': {
'params': {
# rtmp download
u'skip_download': True,
'skip_download': True,
},
},
# france4
{
u'url': u'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
u'info_dict': {
u'id': u'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
u'ext': u'flv',
u'title': u'Hero Corp Making of - Extrait 1',
u'description': u'md5:c87d54871b1790679aec1197e73d650a',
'url': 'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
'info_dict': {
'id': 'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
'ext': 'flv',
'title': 'Hero Corp Making of - Extrait 1',
'description': 'md5:c87d54871b1790679aec1197e73d650a',
},
u'params': {
'params': {
# rtmp download
u'skip_download': True,
'skip_download': True,
},
},
# france5
{
u'url': u'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
u'info_dict': {
u'id': u'92837968',
u'ext': u'mp4',
u'title': u'C à dire ?!',
u'description': u'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
'url': 'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
'info_dict': {
'id': '92837968',
'ext': 'mp4',
'title': 'C à dire ?!',
'description': 'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
},
u'params': {
'params': {
# m3u8 download
u'skip_download': True,
'skip_download': True,
},
},
# franceo
{
u'url': u'http://www.franceo.fr/jt/info-afrique/04-12-2013',
u'info_dict': {
u'id': u'92327925',
u'ext': u'mp4',
u'title': u'Infô-Afrique',
u'description': u'md5:ebf346da789428841bee0fd2a935ea55',
'url': 'http://www.franceo.fr/jt/info-afrique/04-12-2013',
'info_dict': {
'id': '92327925',
'ext': 'mp4',
'title': 'Infô-Afrique',
'description': 'md5:ebf346da789428841bee0fd2a935ea55',
},
u'params': {
'params': {
# m3u8 download
u'skip_download': True,
'skip_download': True,
},
u'skip': u'The id changes frequently',
'skip': 'The id changes frequently',
},
]
@@ -160,26 +163,26 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
'\.fr/\?id-video=([^"/&]+)'),
(r'<a class="video" id="ftv_player_(.+?)"'),
]
video_id = self._html_search_regex(id_res, webpage, u'video ID')
video_id = self._html_search_regex(id_res, webpage, 'video ID')
else:
video_id = mobj.group('id')
return self._extract_video(video_id)
class GenerationQuoiIE(InfoExtractor):
IE_NAME = u'france2.fr:generation-quoi'
IE_NAME = 'france2.fr:generation-quoi'
_VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)'
_TEST = {
u'url': u'http://generation-quoi.france2.fr/portrait/garde-a-vous',
u'file': u'k7FJX8VBcvvLmX4wA5Q.mp4',
u'info_dict': {
u'title': u'Génération Quoi - Garde à Vous',
u'uploader': u'Génération Quoi',
'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous',
'file': 'k7FJX8VBcvvLmX4wA5Q.mp4',
'info_dict': {
'title': 'Génération Quoi - Garde à Vous',
'uploader': 'Génération Quoi',
},
u'params': {
'params': {
# It uses Dailymotion
u'skip_download': True,
'skip_download': True,
},
}
@@ -194,20 +197,20 @@ class GenerationQuoiIE(InfoExtractor):
class CultureboxIE(FranceTVBaseInfoExtractor):
IE_NAME = u'culturebox.francetvinfo.fr'
IE_NAME = 'culturebox.francetvinfo.fr'
_VALID_URL = r'https?://culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
_TEST = {
u'url': u'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813',
u'info_dict': {
u'id': u'EV_6785',
u'ext': u'mp4',
u'title': u'Einstein on the beach au Théâtre du Châtelet',
u'description': u'md5:9ce2888b1efefc617b5e58b3f6200eeb',
'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813',
'info_dict': {
'id': 'EV_6785',
'ext': 'mp4',
'title': 'Einstein on the beach au Théâtre du Châtelet',
'description': 'md5:9ce2888b1efefc617b5e58b3f6200eeb',
},
u'params': {
'params': {
# m3u8 download
u'skip_download': True,
'skip_download': True,
},
}
@@ -215,5 +218,5 @@ class CultureboxIE(FranceTVBaseInfoExtractor):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
webpage = self._download_webpage(url, name)
video_id = self._search_regex(r'"http://videos\.francetv\.fr/video/(.*?)"', webpage, u'video id')
video_id = self._search_regex(r'"http://videos\.francetv\.fr/video/(.*?)"', webpage, 'video id')
return self._extract_video(video_id)

View File

@@ -38,18 +38,6 @@ class GenericIE(InfoExtractor):
'title': 'R\u00e9gis plante sa Jeep',
}
},
# embedded vimeo video
{
'add_ie': ['Vimeo'],
'url': 'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references',
'file': '22444065.mp4',
'md5': '2903896e23df39722c33f015af0666e2',
'info_dict': {
'title': 'ACCU 2011: Move Semantics,Perfect Forwarding, and Rvalue references- Scott Meyers- 13/04/2011',
'uploader_id': 'skillsmatter',
'uploader': 'Skills Matter',
}
},
# bandcamp page with custom domain
{
'add_ie': ['Bandcamp'],
@@ -246,15 +234,25 @@ class GenericIE(InfoExtractor):
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
# Look for BrightCove:
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
if bc_url is not None:
bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
if bc_urls:
self.to_screen('Brightcove video detected.')
surl = smuggle_url(bc_url, {'Referer': url})
return self.url_result(surl, 'Brightcove')
entries = [{
'_type': 'url',
'url': smuggle_url(bc_url, {'Referer': url}),
'ie_key': 'Brightcove'
} for bc_url in bc_urls]
return {
'_type': 'playlist',
'title': video_title,
'id': video_id,
'entries': entries,
}
# Look for embedded (iframe) Vimeo player
mobj = re.search(
r'<iframe[^>]+?src="((?:https?:)?//player.vimeo.com/video/.+?)"', webpage)
r'<iframe[^>]+?src="((?:https?:)?//player\.vimeo\.com/video/.+?)"', webpage)
if mobj:
player_url = unescapeHTML(mobj.group(1))
surl = smuggle_url(player_url, {'Referer': url})
@@ -262,7 +260,7 @@ class GenericIE(InfoExtractor):
# Look for embedded (swf embed) Vimeo player
mobj = re.search(
r'<embed[^>]+?src="(https?://(?:www\.)?vimeo.com/moogaloop.swf.+?)"', webpage)
r'<embed[^>]+?src="(https?://(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
if mobj:
return self.url_result(mobj.group(1), 'Vimeo')
@@ -332,7 +330,7 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group(1), 'Aparat')
# Look for MPORA videos
mobj = re.search(r'<iframe .*?src="(http://mpora\.com/videos/[^"]+)"', webpage)
mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
if mobj is not None:
return self.url_result(mobj.group(1), 'Mpora')
@@ -350,7 +348,7 @@ class GenericIE(InfoExtractor):
# Look for embedded Huffington Post player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live.huffingtonpost\.com/.+?)\1', webpage)
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'HuffPost')
@@ -358,7 +356,7 @@ class GenericIE(InfoExtractor):
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None:
# Look for gorilla-vid style embedding
mobj = re.search(r'(?s)jw_plugins.*?file:\s*["\'](.*?)["\']', webpage)
mobj = re.search(r'(?s)(?:jw_plugins|JWPlayerOptions).*?file\s*:\s*["\'](.*?)["\']', webpage)
if mobj is None:
# Broaden the search a little bit
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import itertools
import re
@@ -8,32 +10,42 @@ from ..utils import (
class GoogleSearchIE(SearchInfoExtractor):
IE_DESC = u'Google Video search'
_MORE_PAGES_INDICATOR = r'id="pnnext" class="pn"'
IE_DESC = 'Google Video search'
_MAX_RESULTS = 1000
IE_NAME = u'video.google:search'
IE_NAME = 'video.google:search'
_SEARCH_KEY = 'gvsearch'
def _get_n_results(self, query, n):
"""Get a specified number of results for a query"""
entries = []
res = {
'_type': 'playlist',
'id': query,
'entries': []
'title': query,
}
for pagenum in itertools.count(1):
result_url = u'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en' % (compat_urllib_parse.quote_plus(query), pagenum*10)
webpage = self._download_webpage(result_url, u'gvsearch:' + query,
note='Downloading result page ' + str(pagenum))
for pagenum in itertools.count():
result_url = (
'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en'
% (compat_urllib_parse.quote_plus(query), pagenum * 10))
for mobj in re.finditer(r'<h3 class="r"><a href="([^"]+)"', webpage):
e = {
webpage = self._download_webpage(
result_url, 'gvsearch:' + query,
note='Downloading result page ' + str(pagenum + 1))
for hit_idx, mobj in enumerate(re.finditer(
r'<h3 class="r"><a href="([^"]+)"', webpage)):
# Skip playlists
if not re.search(r'id="vidthumb%d"' % (hit_idx + 1), webpage):
continue
entries.append({
'_type': 'url',
'url': mobj.group(1)
}
res['entries'].append(e)
})
if (pagenum * 10 > n) or not re.search(self._MORE_PAGES_INDICATOR, webpage):
if (len(entries) >= n) or not re.search(r'class="pn" id="pnnext"', webpage):
res['entries'] = entries[:n]
return res

View File

@@ -1,39 +1,36 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class InaIE(InfoExtractor):
"""Information Extractor for Ina.fr"""
_VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I?[A-F0-9]+)/.*'
_VALID_URL = r'http://(?:www\.)?ina\.fr/video/(?P<id>I?[A-Z0-9]+)'
_TEST = {
u'url': u'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
u'file': u'I12055569.mp4',
u'md5': u'a667021bf2b41f8dc6049479d9bb38a3',
u'info_dict': {
u"title": u"Fran\u00e7ois Hollande \"Je crois que c'est clair\""
'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
'info_dict': {
'id': 'I12055569',
'ext': 'mp4',
'title': 'François Hollande "Je crois que c\'est clair"',
}
}
def _real_extract(self,url):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
mrss_url='http://player.ina.fr/notices/%s.mrss' % video_id
video_extension = 'mp4'
webpage = self._download_webpage(mrss_url, video_id)
mrss_url = 'http://player.ina.fr/notices/%s.mrss' % video_id
info_doc = self._download_xml(mrss_url, video_id)
self.report_extraction(video_id)
video_url = self._html_search_regex(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)',
webpage, u'video URL')
video_url = info_doc.find('.//{http://search.yahoo.com/mrss/}player').attrib['url']
video_title = self._search_regex(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>',
webpage, u'title')
return [{
'id': video_id,
'url': video_url,
'ext': video_extension,
'title': video_title,
}]
return {
'id': video_id,
'url': video_url,
'title': info_doc.find('.//title').text,
}

View File

@@ -1,62 +1,55 @@
from __future__ import unicode_literals
import base64
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
ExtractorError,
)
class InfoQIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
_VALID_URL = r'https?://(?:www\.)?infoq\.com/[^/]+/(?P<id>[^/]+)$'
_TEST = {
u"name": u"InfoQ",
u"url": u"http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
u"file": u"12-jan-pythonthings.mp4",
u"info_dict": {
u"description": u"Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.",
u"title": u"A Few of My Favorite [Python] Things"
"name": "InfoQ",
"url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
"file": "12-jan-pythonthings.mp4",
"info_dict": {
"description": "Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.",
"title": "A Few of My Favorite [Python] Things",
},
"params": {
"skip_download": True,
},
u"params": {
u"skip_download": True
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id=url)
self.report_extraction(url)
webpage = self._download_webpage(url, video_id)
# Extract video URL
mobj = re.search(r"jsclassref ?= ?'([^']*)'", webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract video url')
real_id = compat_urllib_parse.unquote(base64.b64decode(mobj.group(1).encode('ascii')).decode('utf-8'))
encoded_id = self._search_regex(r"jsclassref ?= ?'([^']*)'", webpage, 'encoded id')
real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id
# Extract title
video_title = self._search_regex(r'contentTitle = "(.*?)";',
webpage, u'title')
webpage, 'title')
# Extract description
video_description = self._html_search_regex(r'<meta name="description" content="(.*)"(?:\s*/)?>',
webpage, u'description', fatal=False)
webpage, 'description', fatal=False)
video_filename = video_url.split('/')[-1]
video_id, extension = video_filename.split('.')
info = {
return {
'id': video_id,
'url': video_url,
'uploader': None,
'upload_date': None,
'title': video_title,
'ext': extension, # Extension is always(?) mp4, but seems to be flv
'thumbnail': None,
'ext': extension, # Extension is always(?) mp4, but seems to be flv
'description': video_description,
}
return [info]

View File

@@ -0,0 +1,85 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from random import random
from math import floor
from .common import InfoExtractor
from ..utils import compat_urllib_request
class IPrimaIE(InfoExtractor):
_VALID_URL = r'https?://play\.iprima\.cz/(?P<videogroup>.+)/(?P<videoid>.+)'
_TESTS = [{
'url': 'http://play.iprima.cz/particka/particka-92',
'info_dict': {
'id': '39152',
'ext': 'flv',
'title': 'Partička (92)',
'description': 'md5:3740fda51464da35a2d4d0670b8e4fd6',
'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg',
},
'params': {
'skip_download': True,
},
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
webpage = self._download_webpage(url, video_id)
player_url = 'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' % (
floor(random()*1073741824),
floor(random()*1073741824))
req = compat_urllib_request.Request(player_url)
req.add_header('Referer', url)
playerpage = self._download_webpage(req, video_id)
base_url = ''.join(re.findall(r"embed\['stream'\] = '(.+?)'.+'(\?auth=)'.+'(.+?)';", playerpage)[1])
zoneGEO = self._html_search_regex(r'"zoneGEO":(.+?),', webpage, 'zoneGEO')
if zoneGEO != '0':
base_url = base_url.replace('token', 'token_'+zoneGEO)
formats = []
for format_id in ['lq', 'hq', 'hd']:
filename = self._html_search_regex(r'"%s_id":(.+?),' % format_id, webpage, 'filename')
if filename == 'null':
continue
real_id = self._search_regex(r'Prima-[0-9]{10}-([0-9]+)_', filename, 'real video id')
if format_id == 'lq':
quality = 0
elif format_id == 'hq':
quality = 1
elif format_id == 'hd':
quality = 2
filename = 'hq/'+filename
formats.append({
'format_id': format_id,
'url': base_url,
'quality': quality,
'play_path': 'mp4:'+filename.replace('"', '')[:-4],
'rtmp_live': True,
'ext': 'flv',
})
self._sort_formats(formats)
return {
'id': real_id,
'title': self._og_search_title(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'formats': formats,
'description': self._og_search_description(webpage),
}

View File

@@ -14,15 +14,16 @@ from ..utils import (
class IviIE(InfoExtractor):
IE_DESC = 'ivi.ru'
IE_NAME = 'ivi'
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
_TESTS = [
# Single movie
{
'url': 'http://www.ivi.ru/watch/53141',
'file': '53141.mp4',
'md5': '6ff5be2254e796ed346251d117196cf4',
'info_dict': {
'id': '53141',
'ext': 'mp4',
'title': 'Иван Васильевич меняет профессию',
'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
'duration': 5498,
@@ -33,9 +34,10 @@ class IviIE(InfoExtractor):
# Serial's serie
{
'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
'file': '74791.mp4',
'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
'info_dict': {
'id': '74791',
'ext': 'mp4',
'title': 'Дежурный ангел - 1 серия',
'duration': 2490,
'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
@@ -124,7 +126,7 @@ class IviIE(InfoExtractor):
class IviCompilationIE(InfoExtractor):
IE_DESC = 'ivi.ru compilations'
IE_NAME = 'ivi:compilation'
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
def _extract_entries(self, html, compilation_id):
return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')

View File

@@ -0,0 +1,63 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import unified_strdate
class LifeNewsIE(InfoExtractor):
IE_NAME = 'lifenews'
IE_DESC = 'LIFE | NEWS'
_VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P<id>\d+)'
_TEST = {
'url': 'http://lifenews.ru/news/126342',
'file': '126342.mp4',
'md5': 'e1b50a5c5fb98a6a544250f2e0db570a',
'info_dict': {
'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом',
'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.',
'thumbnail': 'http://lifenews.ru/static/posts/2014/1/126342/.video.jpg',
'upload_date': '20140130',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page')
video_url = self._html_search_regex(
r'<video.*?src="([^"]+)".*?></video>', webpage, 'video URL')
thumbnail = self._html_search_regex(
r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail')
title = self._og_search_title(webpage)
TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS'
if title.endswith(TITLE_SUFFIX):
title = title[:-len(TITLE_SUFFIX)]
description = self._og_search_description(webpage)
view_count = self._html_search_regex(
r'<div class=\'views\'>(\d+)</div>', webpage, 'view count')
comment_count = self._html_search_regex(
r'<div class=\'comments\'>(\d+)</div>', webpage, 'comment count')
upload_date = self._html_search_regex(
r'<time datetime=\'([^\']+)\'>', webpage, 'upload date')
return {
'id': video_id,
'url': video_url,
'thumbnail': thumbnail,
'title': title,
'description': description,
'view_count': view_count,
'comment_count': comment_count,
'upload_date': unified_strdate(upload_date),
}

View File

@@ -1,16 +1,14 @@
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
class LiveLeakIE(InfoExtractor):
_VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
_TEST = {
_TESTS = [{
'url': 'http://www.liveleak.com/view?i=757_1364311680',
'file': '757_1364311680.mp4',
'md5': '0813c2430bea7a46bf13acf3406992f4',
@@ -19,15 +17,37 @@ class LiveLeakIE(InfoExtractor):
'uploader': 'ljfriel2',
'title': 'Most unlucky car accident'
}
}
},
{
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
'file': 'f93_1390833151.mp4',
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
'info_dict': {
'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
'uploader': 'ARD_Stinkt',
'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
}
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
r'file: "(.*?)",', webpage, 'video URL')
sources_raw = self._search_regex(
r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs', default=None)
if sources_raw is None:
sources_raw = '[{ %s}]' % (
self._search_regex(r'(file: ".*?"),', webpage, 'video URL'))
sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw)
sources = json.loads(sources_json)
formats = [{
'format_note': s.get('label'),
'url': s['file'],
} for s in sources]
self._sort_formats(formats)
video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
video_description = self._og_search_description(webpage)
@@ -36,9 +56,8 @@ class LiveLeakIE(InfoExtractor):
return {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': video_title,
'description': video_description,
'uploader': video_uploader
'uploader': video_uploader,
'formats': formats,
}

View File

@@ -0,0 +1,56 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class M6IE(InfoExtractor):
IE_NAME = 'm6'
_VALID_URL = r'http://(?:www\.)?m6\.fr/[^/]+/videos/(?P<id>\d+)-[^\.]+\.html'
_TEST = {
'url': 'http://www.m6.fr/emission-les_reines_du_shopping/videos/11323908-emeline_est_la_reine_du_shopping_sur_le_theme_ma_fete_d_8217_anniversaire.html',
'md5': '242994a87de2c316891428e0176bcb77',
'info_dict': {
'id': '11323908',
'ext': 'mp4',
'title': 'Emeline est la Reine du Shopping sur le thème « Ma fête danniversaire ! »',
'description': 'md5:1212ae8fb4b7baa4dc3886c5676007c2',
'duration': 100,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
rss = self._download_xml('http://ws.m6.fr/v1/video/info/m6/bonus/%s' % video_id, video_id,
'Downloading video RSS')
title = rss.find('./channel/item/title').text
description = rss.find('./channel/item/description').text
thumbnail = rss.find('./channel/item/visuel_clip_big').text
duration = int(rss.find('./channel/item/duration').text)
view_count = int(rss.find('./channel/item/nombre_vues').text)
formats = []
for format_id in ['lq', 'sd', 'hq', 'hd']:
video_url = rss.find('./channel/item/url_video_%s' % format_id)
if video_url is None:
continue
formats.append({
'url': video_url.text,
'format_id': format_id,
})
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'view_count': view_count,
'formats': formats,
}

View File

@@ -16,7 +16,8 @@ class MalemotionIE(InfoExtractor):
'info_dict': {
"title": "Bien dur",
"age_limit": 18,
}
},
'skip': 'This video has been deleted.'
}
def _real_extract(self, url):

View File

@@ -0,0 +1,114 @@
from __future__ import unicode_literals
import re
import time
from .common import InfoExtractor
from ..utils import (
ExtractorError,
compat_urllib_request,
compat_urllib_parse,
)
class MooshareIE(InfoExtractor):
IE_NAME = 'mooshare'
IE_DESC = 'Mooshare.biz'
_VALID_URL = r'http://mooshare\.biz/(?P<id>[\da-z]{12})'
_TESTS = [
{
'url': 'http://mooshare.biz/8dqtk4bjbp8g',
'md5': '4e14f9562928aecd2e42c6f341c8feba',
'info_dict': {
'id': '8dqtk4bjbp8g',
'ext': 'mp4',
'title': 'Comedy Football 2011 - (part 1-2)',
'duration': 893,
},
},
{
'url': 'http://mooshare.biz/aipjtoc4g95j',
'info_dict': {
'id': 'aipjtoc4g95j',
'ext': 'mp4',
'title': 'Orange Caramel Dashing Through the Snow',
'duration': 212,
},
'params': {
# rtmp download
'skip_download': True,
}
}
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
page = self._download_webpage(url, video_id, 'Downloading page')
if re.search(r'>Video Not Found or Deleted<', page) is not None:
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
hash_key = self._html_search_regex(r'<input type="hidden" name="hash" value="([^"]+)">', page, 'hash')
title = self._html_search_regex(r'(?m)<div class="blockTitle">\s*<h2>Watch ([^<]+)</h2>', page, 'title')
download_form = {
'op': 'download1',
'id': video_id,
'hash': hash_key,
}
request = compat_urllib_request.Request(
'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self.to_screen('%s: Waiting for timeout' % video_id)
time.sleep(5)
video_page = self._download_webpage(request, video_id, 'Downloading video page')
thumbnail = self._html_search_regex(r'image:\s*"([^"]+)",', video_page, 'thumbnail', fatal=False)
duration_str = self._html_search_regex(r'duration:\s*"(\d+)",', video_page, 'duration', fatal=False)
duration = int(duration_str) if duration_str is not None else None
formats = []
# SD video
mobj = re.search(r'(?m)file:\s*"(?P<url>[^"]+)",\s*provider:', video_page)
if mobj is not None:
formats.append({
'url': mobj.group('url'),
'format_id': 'sd',
'format': 'SD',
})
# HD video
mobj = re.search(r'\'hd-2\': { file: \'(?P<url>[^\']+)\' },', video_page)
if mobj is not None:
formats.append({
'url': mobj.group('url'),
'format_id': 'hd',
'format': 'HD',
})
# rtmp video
mobj = re.search(r'(?m)file: "(?P<playpath>[^"]+)",\s*streamer: "(?P<rtmpurl>rtmp://[^"]+)",', video_page)
if mobj is not None:
formats.append({
'url': mobj.group('rtmpurl'),
'play_path': mobj.group('playpath'),
'rtmp_live': False,
'ext': 'mp4',
'format_id': 'rtmp',
'format': 'HD',
})
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}

View File

@@ -82,10 +82,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
title_el = find_xpath_attr(
itemdoc, './/{http://search.yahoo.com/mrss/}category',
'scheme', 'urn:mtvn:video_title')
if title_el is None:
title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
if title_el is None:
title_el = itemdoc.find('.//title')
if title_el.text is None:
title_el = None
if title_el is None:
title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
title = title_el.text
if title is None:
raise ExtractorError('Could not find video title')
@@ -119,7 +122,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
if mgid.endswith('.swf'):
mgid = mgid[:-4]
except RegexNotFoundError:
mgid = self._search_regex(r'data-mgid="(.*?)"', webpage, u'mgid')
mgid = self._search_regex(
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
webpage, u'mgid')
return self._get_videos_info(mgid)

View File

@@ -1,3 +1,4 @@
from __future__ import unicode_literals
import os.path
from .common import InfoExtractor
@@ -11,13 +12,13 @@ from ..utils import (
class MySpassIE(InfoExtractor):
_VALID_URL = r'http://www\.myspass\.de/.*'
_TEST = {
u'url': u'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
u'file': u'11741.mp4',
u'md5': u'0b49f4844a068f8b33f4b7c88405862b',
u'info_dict': {
u"description": u"Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",
u"title": u"Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2"
}
'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
'file': '11741.mp4',
'md5': '0b49f4844a068f8b33f4b7c88405862b',
'info_dict': {
"description": "Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",
"title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2",
},
}
def _real_extract(self, url):
@@ -37,12 +38,11 @@ class MySpassIE(InfoExtractor):
# extract values from metadata
url_flv_el = metadata.find('url_flv')
if url_flv_el is None:
raise ExtractorError(u'Unable to extract download url')
raise ExtractorError('Unable to extract download url')
video_url = url_flv_el.text
extension = os.path.splitext(video_url)[1][1:]
title_el = metadata.find('title')
if title_el is None:
raise ExtractorError(u'Unable to extract title')
raise ExtractorError('Unable to extract title')
title = title_el.text
format_id_el = metadata.find('format_id')
if format_id_el is None:
@@ -59,13 +59,12 @@ class MySpassIE(InfoExtractor):
thumbnail = imagePreview_el.text
else:
thumbnail = None
info = {
return {
'id': video_id,
'url': video_url,
'title': title,
'ext': extension,
'format': format,
'thumbnail': thumbnail,
'description': description
'description': description,
}
return [info]

View File

@@ -1,48 +1,39 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
class NBAIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
_TEST = {
u'url': u'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
u'file': u'0021200253-okc-bkn-recap.nba.mp4',
u'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
u'info_dict': {
u"description": u"Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.",
u"title": u"Thunder vs. Nets"
}
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
'file': u'0021200253-okc-bkn-recap.nba.mp4',
'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
'info_dict': {
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
'title': 'Thunder vs. Nets',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
video_url = u'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
shortened_video_id = video_id.rpartition('/')[2]
title = self._og_search_title(webpage, default=shortened_video_id).replace('NBA.com: ', '')
# It isn't there in the HTML it returns to us
# uploader_date = self._html_search_regex(r'<b>Date:</b> (.*?)</div>', webpage, 'upload_date', fatal=False)
description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False)
info = {
return {
'id': shortened_video_id,
'url': video_url,
'ext': 'mp4',
'title': title,
# 'uploader_date': uploader_date,
'description': description,
}
return [info]

View File

@@ -0,0 +1,89 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class NDRIE(InfoExtractor):
IE_NAME = 'ndr'
IE_DESC = 'NDR.de - Mediathek'
_VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html'
_TESTS = [
# video
{
'url': 'http://www.ndr.de/fernsehen/sendungen/hallo_niedersachsen/media/hallonds19925.html',
'md5': '20eba151ff165f386643dad9c1da08f7',
'info_dict': {
'id': '19925',
'ext': 'mp4',
'title': 'Hallo Niedersachsen ',
'description': 'Bei Hallo Niedersachsen um 19:30 Uhr erfahren Sie alles, was am Tag in Niedersachsen los war.',
'duration': 1722,
},
},
# audio
{
'url': 'http://www.ndr.de/903/audio191719.html',
'md5': '41ed601768534dd18a9ae34d84798129',
'info_dict': {
'id': '191719',
'ext': 'mp3',
'title': '"Es war schockierend"',
'description': 'md5:ed7ff8364793545021a6355b97e95f10',
'duration': 112,
}
}
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
page = self._download_webpage(url, video_id, 'Downloading page')
title = self._og_search_title(page)
description = self._og_search_description(page)
mobj = re.search(
r'<div class="duration"><span class="min">(?P<minutes>\d+)</span>:<span class="sec">(?P<seconds>\d+)</span></div>',
page)
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
formats = []
mp3_url = re.search(r'''{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page)
if mp3_url:
formats.append({
'url': mp3_url.group('audio'),
'format_id': 'mp3',
})
thumbnail = None
video_url = re.search(r'''3: {src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
if video_url:
thumbnail = self._html_search_regex(r'(?m)title: "NDR PLAYER",\s*poster: "([^"]+)",',
page, 'thumbnail', fatal=False)
if thumbnail:
thumbnail = 'http://www.ndr.de' + thumbnail
for format_id in ['lo', 'hi', 'hq']:
formats.append({
'url': '%s.%s.mp4' % (video_url.group('video'), format_id),
'format_id': format_id,
})
if not formats:
raise ExtractorError('No media links available for %s' % video_id)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}

View File

@@ -4,18 +4,18 @@ import json
import re
from .common import InfoExtractor
from ..utils import determine_ext
class NewgroundsIE(InfoExtractor):
_VALID_URL = r'(?:https?://)?(?:www\.)?newgrounds\.com/audio/listen/(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?newgrounds\.com/audio/listen/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.newgrounds.com/audio/listen/549479',
'file': '549479.mp3',
'md5': 'fe6033d297591288fa1c1f780386f07a',
'info_dict': {
"title": "B7 - BusMode",
"uploader": "Burn7",
'id': '549479',
'ext': 'mp3',
'title': 'B7 - BusMode',
'uploader': 'Burn7',
}
}

View File

@@ -0,0 +1,93 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_request,
compat_urllib_parse,
)
class NFBIE(InfoExtractor):
IE_NAME = 'nfb'
IE_DESC = 'National Film Board of Canada'
_VALID_URL = r'https?://(?:www\.)?(nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)'
_TEST = {
'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny',
'info_dict': {
'id': 'qallunaat_why_white_people_are_funny',
'ext': 'mp4',
'title': 'Qallunaat! Why White People Are Funny ',
'description': 'md5:836d8aff55e087d04d9f6df554d4e038',
'duration': 3128,
'uploader': 'Mark Sandiford',
'uploader_id': 'mark-sandiford',
},
'params': {
# rtmp download
'skip_download': True,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page')
uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
page, 'director id', fatal=False)
uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
page, 'director name', fatal=False)
request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id,
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')
config = self._download_xml(request, video_id, 'Downloading player config XML')
title = None
description = None
thumbnail = None
duration = None
formats = []
def extract_thumbnail(media):
thumbnails = {}
for asset in media.findall('assets/asset'):
thumbnails[asset.get('quality')] = asset.find('default/url').text
if not thumbnails:
return None
if 'high' in thumbnails:
return thumbnails['high']
return list(thumbnails.values())[0]
for media in config.findall('./player/stream/media'):
if media.get('type') == 'posterImage':
thumbnail = extract_thumbnail(media)
elif media.get('type') == 'video':
duration = int(media.get('duration'))
title = media.find('title').text
description = media.find('description').text
# It seems assets always go from lower to better quality, so no need to sort
formats = [{
'url': x.find('default/streamerURI').text + '/',
'play_path': x.find('default/url').text,
'rtmp_live': False,
'ext': 'mp4',
'format_id': x.get('quality'),
} for x in media.findall('assets/asset')]
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'uploader': uploader,
'uploader_id': uploader_id,
'formats': formats,
}

View File

@@ -0,0 +1,61 @@
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
unified_strdate,
)
class NormalbootsIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
_TEST = {
u'url': u'http://normalboots.com/video/home-alone-games-jontron/',
u'file': u'home-alone-games-jontron.mp4',
u'md5': u'8bf6de238915dd501105b44ef5f1e0f6',
u'info_dict': {
u'title': u'Home Alone Games - JonTron - NormalBoots',
u'description': u'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for \u2018Tense Battle Theme\u2019:\xa0http://www.youtube.com/Kiamet/',
u'uploader': u'JonTron',
u'upload_date': u'20140125',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group('videoid')
info = {
'id': video_id,
'uploader': None,
'upload_date': None,
}
if url[:4] != 'http':
url = 'http://' + url
webpage = self._download_webpage(url, video_id)
video_title = self._og_search_title(webpage)
video_description = self._og_search_description(webpage)
video_thumbnail = self._og_search_thumbnail(webpage)
video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
webpage, 'uploader')
raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
webpage, 'date')
video_upload_date = unified_strdate(raw_upload_date)
video_upload_date = unified_strdate(raw_upload_date)
player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
player_page = self._download_webpage(player_url, video_id)
video_url = u'http://player.screenwavemedia.com/' + self._html_search_regex(r"'file':\s'(?P<file>[0-9A-Za-z-_\.]+)'", player_page, 'file')
info['url'] = video_url
info['title'] = video_title
info['description'] = video_description
info['thumbnail'] = video_thumbnail
info['uploader'] = video_uploader
info['upload_date'] = video_upload_date
return info

View File

@@ -5,7 +5,7 @@ from .common import InfoExtractor
from ..utils import unescapeHTML
class OoyalaIE(InfoExtractor):
_VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P<id>.+?)(&|$)'
_VALID_URL = r'https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=(?P<id>.+?)(&|$)'
_TEST = {
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video

View File

@@ -1,34 +1,68 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
class PBSIE(InfoExtractor):
_VALID_URL = r'https?://video\.pbs\.org/video/(?P<id>\d+)/?'
_VALID_URL = r'''(?x)https?://
(?:
# Direct video URL
video\.pbs\.org/video/(?P<id>[0-9]+)/? |
# Article with embedded player
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
# Player
video\.pbs\.org/partnerplayer/(?P<player_id>[^/]+)/
)
'''
_TEST = {
u'url': u'http://video.pbs.org/video/2365006249/',
u'file': u'2365006249.mp4',
u'md5': 'ce1888486f0908d555a8093cac9a7362',
u'info_dict': {
u'title': u'A More Perfect Union',
u'description': u'md5:ba0c207295339c8d6eced00b7c363c6a',
u'duration': 3190,
'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
'md5': 'ce1888486f0908d555a8093cac9a7362',
'info_dict': {
'id': '2365006249',
'ext': 'mp4',
'title': 'A More Perfect Union',
'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
'duration': 3190,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
presumptive_id = mobj.group('presumptive_id')
display_id = presumptive_id
if presumptive_id:
webpage = self._download_webpage(url, display_id)
url = self._search_regex(
r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
webpage, 'player URL')
mobj = re.match(self._VALID_URL, url)
player_id = mobj.group('player_id')
if not display_id:
display_id = player_id
if player_id:
player_page = self._download_webpage(
url, display_id, note='Downloading player page',
errnote='Could not download player page')
video_id = self._search_regex(
r'<div\s+id="video_([0-9]+)"', player_page, 'video ID')
else:
video_id = mobj.group('id')
display_id = video_id
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
info_page = self._download_webpage(info_url, video_id)
info =json.loads(info_page)
return {'id': video_id,
'title': info['title'],
'url': info['alternate_encoding']['url'],
'ext': 'mp4',
'description': info['program'].get('description'),
'thumbnail': info.get('image_url'),
'duration': info.get('duration'),
}
info = self._download_json(info_url, display_id)
return {
'id': video_id,
'title': info['title'],
'url': info['alternate_encoding']['url'],
'ext': 'mp4',
'description': info['program'].get('description'),
'thumbnail': info.get('image_url'),
'duration': info.get('duration'),
}

View File

@@ -1,10 +1,11 @@
# encoding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse_urlparse,
ExtractorError,
)
@@ -12,16 +13,17 @@ from ..utils import (
class RBMARadioIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
_TEST = {
u'url': u'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
u'file': u'ford-lopatin-live-at-primavera-sound-2011.mp3',
u'md5': u'6bc6f9bcb18994b4c983bc3bf4384d95',
u'info_dict': {
u"uploader_id": u"ford-lopatin",
u"location": u"Spain",
u"description": u"Joel Ford and Daniel \u2019Oneohtrix Point Never\u2019 Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
u"uploader": u"Ford & Lopatin",
u"title": u"Live at Primavera Sound 2011"
}
'url': 'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
'info_dict': {
'id': 'ford-lopatin-live-at-primavera-sound-2011',
'ext': 'mp3',
"uploader_id": "ford-lopatin",
"location": "Spain",
"description": "Joel Ford and Daniel Oneohtrix Point Never Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
"uploader": "Ford & Lopatin",
"title": "Live at Primavera Sound 2011",
},
}
def _real_extract(self, url):
@@ -31,26 +33,23 @@ class RBMARadioIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
webpage, u'json data', flags=re.MULTILINE)
webpage, 'json data', flags=re.MULTILINE)
try:
data = json.loads(json_data)
except ValueError as e:
raise ExtractorError(u'Invalid JSON: ' + str(e))
raise ExtractorError('Invalid JSON: ' + str(e))
video_url = data['akamai_url'] + '&cbr=256'
url_parts = compat_urllib_parse_urlparse(video_url)
video_ext = url_parts.path.rpartition('.')[2]
info = {
'id': video_id,
'url': video_url,
'ext': video_ext,
'title': data['title'],
'description': data.get('teaser_text'),
'location': data.get('country_of_origin'),
'uploader': data.get('host', {}).get('name'),
'uploader_id': data.get('host', {}).get('slug'),
'thumbnail': data.get('image', {}).get('large_url_2x'),
'duration': data.get('duration'),
return {
'id': video_id,
'url': video_url,
'title': data['title'],
'description': data.get('teaser_text'),
'location': data.get('country_of_origin'),
'uploader': data.get('host', {}).get('name'),
'uploader_id': data.get('host', {}).get('slug'),
'thumbnail': data.get('image', {}).get('large_url_2x'),
'duration': data.get('duration'),
}
return [info]

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@@ -11,12 +13,12 @@ class Ro220IE(InfoExtractor):
IE_NAME = '220.ro'
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
_TEST = {
u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
u'file': u'LYV6doKo7f.mp4',
u'md5': u'03af18b73a07b4088753930db7a34add',
u'info_dict': {
u"title": u"Luati-le Banii sez 4 ep 1",
u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
"url": "http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
'file': 'LYV6doKo7f.mp4',
'md5': '03af18b73a07b4088753930db7a34add',
'info_dict': {
"title": "Luati-le Banii sez 4 ep 1",
"description": "Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
}
}
@@ -27,10 +29,10 @@ class Ro220IE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
flashVars_str = self._search_regex(
r'<param name="flashVars" value="([^"]+)"',
webpage, u'flashVars')
webpage, 'flashVars')
flashVars = compat_parse_qs(flashVars_str)
info = {
return {
'_type': 'video',
'id': video_id,
'ext': 'mp4',
@@ -39,4 +41,3 @@ class Ro220IE(InfoExtractor):
'description': clean_html(flashVars['desc'][0]),
'thumbnail': flashVars['preview'][0],
}
return info

View File

@@ -1,34 +1,36 @@
import re
from __future__ import unicode_literals
from .mtv import MTVServicesInfoExtractor
class SouthParkStudiosIE(MTVServicesInfoExtractor):
IE_NAME = u'southparkstudios.com'
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
IE_NAME = 'southparkstudios.com'
_VALID_URL = r'https?://(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
_TESTS = [{
u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
u'info_dict': {
u'title': u'Bat Daded',
u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
'url': 'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
'info_dict': {
'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30',
'ext': 'mp4',
'title': 'Bat Daded',
'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.',
},
}]
class SouthparkDeIE(SouthParkStudiosIE):
IE_NAME = u'southpark.de'
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
IE_NAME = 'southpark.de'
_VALID_URL = r'https?://(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
_TESTS = [{
u'url': u'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',
u'file': u'85487c96-b3b9-4e39-9127-ad88583d9bf2.mp4',
u'info_dict': {
u'title': u'The Government Won\'t Respect My Privacy',
u'description': u'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
'url': 'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',
'info_dict': {
'id': '85487c96-b3b9-4e39-9127-ad88583d9bf2',
'ext': 'mp4',
'title': 'The Government Won\'t Respect My Privacy',
'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
},
}]

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@@ -6,20 +8,20 @@ from .common import InfoExtractor
class SpiegelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
_TESTS = [{
u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
u'file': u'1259285.mp4',
u'md5': u'2c2754212136f35fb4b19767d242f66e',
u'info_dict': {
u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv"
}
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
'file': '1259285.mp4',
'md5': '2c2754212136f35fb4b19767d242f66e',
'info_dict': {
'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
},
},
{
u'url': u'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
u'file': u'1309159.mp4',
u'md5': u'f2cdf638d7aa47654e251e1aee360af1',
u'info_dict': {
u'title': u'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers'
}
'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
'file': '1309159.mp4',
'md5': 'f2cdf638d7aa47654e251e1aee360af1',
'info_dict': {
'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
},
}]
def _real_extract(self, url):
@@ -29,17 +31,17 @@ class SpiegelIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video_title = self._html_search_regex(
r'<div class="module-title">(.*?)</div>', webpage, u'title')
r'<div class="module-title">(.*?)</div>', webpage, 'title')
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
xml_url = 'http://video2.spiegel.de/flash/' + video_id + '.xml'
idoc = self._download_xml(
xml_url, video_id,
note=u'Downloading XML', errnote=u'Failed to download XML')
note='Downloading XML', errnote='Failed to download XML')
formats = [
{
'format_id': n.tag.rpartition('type')[2],
'url': u'http://video2.spiegel.de/flash/' + n.find('./filename').text,
'url': 'http://video2.spiegel.de/flash/' + n.find('./filename').text,
'width': int(n.find('./width').text),
'height': int(n.find('./height').text),
'abr': int(n.find('./audiobitrate').text),
@@ -55,10 +57,9 @@ class SpiegelIE(InfoExtractor):
self._sort_formats(formats)
info = {
return {
'id': video_id,
'title': video_title,
'duration': duration,
'formats': formats,
}
return info

View File

@@ -1,36 +1,38 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class StatigramIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)'
_VALID_URL = r'https?://(www\.)?statigr\.am/p/(?P<id>[^/]+)'
_TEST = {
u'url': u'http://statigr.am/p/522207370455279102_24101272',
u'file': u'522207370455279102_24101272.mp4',
u'md5': u'6eb93b882a3ded7c378ee1d6884b1814',
u'info_dict': {
u'uploader_id': u'aguynamedpatrick',
u'title': u'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
'url': 'http://statigr.am/p/522207370455279102_24101272',
'md5': '6eb93b882a3ded7c378ee1d6884b1814',
'info_dict': {
'id': '522207370455279102_24101272',
'ext': 'mp4',
'uploader_id': 'aguynamedpatrick',
'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
html_title = self._html_search_regex(
r'<title>(.+?)</title>',
webpage, u'title')
webpage, 'title')
title = re.sub(r'(?: *\(Videos?\))? \| Statigram$', '', html_title)
uploader_id = self._html_search_regex(
r'@([^ ]+)', title, u'uploader name', fatal=False)
ext = 'mp4'
r'@([^ ]+)', title, 'uploader name', fatal=False)
return [{
'id': video_id,
'url': self._og_search_video_url(webpage),
'ext': ext,
'title': title,
return {
'id': video_id,
'url': self._og_search_video_url(webpage),
'title': title,
'thumbnail': self._og_search_thumbnail(webpage),
'uploader_id' : uploader_id
}]
'uploader_id': uploader_id
}

View File

@@ -62,10 +62,13 @@ class SubtitlesInfoExtractor(InfoExtractor):
subtitles[sub_lang] = subtitle
return subtitles
def _download_subtitle_url(self, sub_lang, url):
return self._download_webpage(url, None, note=False)
def _request_subtitle_url(self, sub_lang, url):
""" makes the http request for the subtitle """
try:
sub = self._download_webpage(url, None, note=False)
sub = self._download_subtitle_url(sub_lang, url)
except ExtractorError as err:
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
return
@@ -79,7 +82,11 @@ class SubtitlesInfoExtractor(InfoExtractor):
returns {sub_lang: url} or {} if not available
Must be redefined by the subclasses
"""
pass
# By default, allow implementations to simply pass in the result
assert isinstance(webpage, dict), \
'_get_available_subtitles not implemented'
return webpage
def _get_available_automatic_caption(self, video_id, webpage):
"""

View File

@@ -1,22 +1,23 @@
#coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
)
from ..utils import determine_ext
class ThisAVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
_TEST = {
u"url": u"http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html",
u"file": u"47734.flv",
u"md5": u"0480f1ef3932d901f0e0e719f188f19b",
u"info_dict": {
u"title": u"高樹マリア - Just fit",
u"uploader": u"dj7970",
u"uploader_id": u"dj7970"
'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html',
'md5': '0480f1ef3932d901f0e0e719f188f19b',
'info_dict': {
'id': '47734',
'ext': 'flv',
'title': '高樹マリア - Just fit',
'uploader': 'dj7970',
'uploader_id': 'dj7970'
}
}
@@ -25,19 +26,18 @@ class ThisAVIE(InfoExtractor):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, u'title')
title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, 'title')
video_url = self._html_search_regex(
r"addVariable\('file','([^']+)'\);", webpage, u'video url')
r"addVariable\('file','([^']+)'\);", webpage, 'video url')
uploader = self._html_search_regex(
r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
webpage, u'uploader name', fatal=False)
webpage, 'uploader name', fatal=False)
uploader_id = self._html_search_regex(
r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
webpage, u'uploader id', fatal=False)
webpage, 'uploader id', fatal=False)
ext = determine_ext(video_url)
return {
'_type': 'video',
'id': video_id,
'url': video_url,
'uploader': uploader,

View File

@@ -0,0 +1,50 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from youtube_dl.utils import ExtractorError
class TinyPicIE(InfoExtractor):
IE_NAME = 'tinypic'
IE_DESC = 'tinypic.com videos'
_VALID_URL = r'http://tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+'
_TEST = {
'url': 'http://tinypic.com/player.php?v=6xw7tc%3E&s=5#.UtqZmbRFCM8',
'md5': '609b74432465364e72727ebc6203f044',
'info_dict': {
'id': '6xw7tc',
'ext': 'flv',
'title': 'shadow phenomenon weird',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id, 'Downloading page')
mobj = re.search(r'(?m)fo\.addVariable\("file",\s"(?P<fileid>[\da-z]+)"\);\n'
'\s+fo\.addVariable\("s",\s"(?P<serverid>\d+)"\);', webpage)
if mobj is None:
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
file_id = mobj.group('fileid')
server_id = mobj.group('serverid')
KEYWORDS_SUFFIX = ', Video, images, photos, videos, myspace, ebay, video hosting, photo hosting'
keywords = self._html_search_meta('keywords', webpage, 'title')
title = keywords[:-len(KEYWORDS_SUFFIX)] if keywords.endswith(KEYWORDS_SUFFIX) else ''
video_url = 'http://v%s.tinypic.com/%s.flv' % (server_id, file_id)
thumbnail = 'http://v%s.tinypic.com/%s_th.jpg' % (server_id, file_id)
return {
'id': file_id,
'url': video_url,
'thumbnail': thumbnail,
'title': title
}

View File

@@ -1,4 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@@ -9,25 +11,25 @@ from ..utils import (
class TouTvIE(InfoExtractor):
IE_NAME = u'tou.tv'
IE_NAME = 'tou.tv'
_VALID_URL = r'https?://www\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/(?P<episode>S[0-9]+E[0-9]+)))'
_TEST = {
u'url': u'http://www.tou.tv/30-vies/S04E41',
u'file': u'30-vies_S04E41.mp4',
u'info_dict': {
u'title': u'30 vies Saison 4 / Épisode 41',
u'description': u'md5:da363002db82ccbe4dafeb9cab039b09',
u'age_limit': 8,
u'uploader': u'Groupe des Nouveaux Médias',
u'duration': 1296,
u'upload_date': u'20131118',
u'thumbnail': u'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg',
'url': 'http://www.tou.tv/30-vies/S04E41',
'file': '30-vies_S04E41.mp4',
'info_dict': {
'title': '30 vies Saison 4 / Épisode 41',
'description': 'md5:da363002db82ccbe4dafeb9cab039b09',
'age_limit': 8,
'uploader': 'Groupe des Nouveaux Médias',
'duration': 1296,
'upload_date': '20131118',
'thumbnail': 'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg',
},
u'params': {
u'skip_download': True, # Requires rtmpdump
'params': {
'skip_download': True, # Requires rtmpdump
},
u'skip': 'Only available in Canada'
'skip': 'Only available in Canada'
}
def _real_extract(self, url):
@@ -36,25 +38,25 @@ class TouTvIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
mediaId = self._search_regex(
r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
r'"idMedia":\s*"([^"]+)"', webpage, 'media ID')
streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
streams_url = 'http://release.theplatform.com/content.select?pid=' + mediaId
streams_doc = self._download_xml(
streams_url, video_id, note=u'Downloading stream list')
streams_url, video_id, note='Downloading stream list')
video_url = next(n.text
for n in streams_doc.findall('.//choice/url')
if u'//ad.doubleclick' not in n.text)
if '//ad.doubleclick' not in n.text)
if video_url.endswith('/Unavailable.flv'):
raise ExtractorError(
u'Access to this video is blocked from outside of Canada',
'Access to this video is blocked from outside of Canada',
expected=True)
duration_str = self._html_search_meta(
'video:duration', webpage, u'duration')
'video:duration', webpage, 'duration')
duration = int(duration_str) if duration_str else None
upload_date_str = self._html_search_meta(
'video:release_date', webpage, u'upload date')
'video:release_date', webpage, 'upload date')
upload_date = unified_strdate(upload_date_str) if upload_date_str else None
return {

View File

@@ -1,17 +1,21 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class TrailerAddictIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
_TEST = {
u'url': u'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
u'file': u'76184.mp4',
u'md5': u'57e39dbcf4142ceb8e1f242ff423fd71',
u'info_dict': {
u"title": u"Prince Avalanche Trailer",
u"description": u"Trailer for Prince Avalanche.Two highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind."
'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
'md5': '41365557f3c8c397d091da510e73ceb4',
'info_dict': {
'id': '76184',
'ext': 'mp4',
'title': 'Prince Avalanche Trailer',
'description': 'Trailer for Prince Avalanche.\n\nTwo highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind.',
}
}
@@ -22,9 +26,15 @@ class TrailerAddictIE(InfoExtractor):
title = self._search_regex(r'<title>(.+?)</title>',
webpage, 'video title').replace(' - Trailer Addict','')
view_count = self._search_regex(r'Views: (.+?)<br />',
webpage, 'Views Count')
video_id = self._og_search_property('video', webpage, 'Video id').split('=')[1]
view_count_str = self._search_regex(
r'<span class="views_n">([0-9,.]+)</span>',
webpage, 'view count', fatal=False)
view_count = (
None if view_count_str is None
else int(view_count_str.replace(',', '')))
video_id = self._search_regex(
r'<param\s+name="movie"\s+value="/emb/([0-9]+)"\s*/>',
webpage, 'video id')
# Presence of (no)watchplus function indicates HD quality is available
if re.search(r'function (no)?watchplus()', webpage):
@@ -39,14 +49,16 @@ class TrailerAddictIE(InfoExtractor):
info_webpage, 'Download url').replace('%3F','?')
thumbnail_url = self._search_regex(r'&image=(.+?)&',
info_webpage, 'thumbnail url')
ext = final_url.split('.')[-1].split('?')[0]
return [{
'id' : video_id,
'url' : final_url,
'ext' : ext,
'title' : title,
'thumbnail' : thumbnail_url,
'description' : self._og_search_description(webpage),
'view_count' : view_count,
}]
description = self._html_search_regex(
r'(?s)<div class="synopsis">.*?<div class="movie_label_info"[^>]*>(.*?)</div>',
webpage, 'description', fatal=False)
return {
'id': video_id,
'url': final_url,
'title': title,
'thumbnail': thumbnail_url,
'description': description,
'view_count': view_count,
}

View File

@@ -11,7 +11,7 @@ from ..aes import (
)
class Tube8IE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/[^/]+/[^/]+/(?P<videoid>[0-9]+)/?)'
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/.+?/(?P<videoid>\d+)/?)$'
_TEST = {
u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
u'file': u'229795.mp4',

View File

@@ -1,3 +1,4 @@
from __future__ import unicode_literals
import base64
import re
@@ -6,15 +7,16 @@ from ..utils import (
compat_parse_qs,
)
class TutvIE(InfoExtractor):
_VALID_URL=r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
_VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
_TEST = {
u'url': u'http://tu.tv/videos/noah-en-pabellon-cuahutemoc',
u'file': u'2742556.flv',
u'md5': u'5eb766671f69b82e528dc1e7769c5cb2',
u'info_dict': {
u"title": u"Noah en pabellon cuahutemoc"
}
'url': 'http://tu.tv/videos/noah-en-pabellon-cuahutemoc',
'file': '2742556.flv',
'md5': '5eb766671f69b82e528dc1e7769c5cb2',
'info_dict': {
'title': 'Noah en pabellon cuahutemoc',
},
}
def _real_extract(self, url):
@@ -22,18 +24,15 @@ class TutvIE(InfoExtractor):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, u'internal video ID')
internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID')
data_url = u'http://tu.tv/flvurl.php?codVideo=' + str(internal_id)
data_content = self._download_webpage(data_url, video_id, note=u'Downloading video info')
data_url = 'http://tu.tv/flvurl.php?codVideo=' + str(internal_id)
data_content = self._download_webpage(data_url, video_id, note='Downloading video info')
data = compat_parse_qs(data_content)
video_url = base64.b64decode(data['kpt'][0]).decode('utf-8')
ext = video_url.partition(u'?')[0].rpartition(u'.')[2]
info = {
return {
'id': internal_id,
'url': video_url,
'ext': ext,
'title': self._og_search_title(webpage),
}
return [info]

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import json
import re
@@ -10,48 +12,48 @@ from ..utils import (
class UstreamIE(InfoExtractor):
_VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
IE_NAME = u'ustream'
IE_NAME = 'ustream'
_TEST = {
u'url': u'http://www.ustream.tv/recorded/20274954',
u'file': u'20274954.flv',
u'md5': u'088f151799e8f572f84eb62f17d73e5c',
u'info_dict': {
u"uploader": u"Young Americans for Liberty",
u"title": u"Young Americans for Liberty February 7, 2012 2:28 AM"
}
'url': 'http://www.ustream.tv/recorded/20274954',
'file': '20274954.flv',
'md5': '088f151799e8f572f84eb62f17d73e5c',
'info_dict': {
"uploader": "Young Americans for Liberty",
"title": "Young Americans for Liberty February 7, 2012 2:28 AM",
},
}
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('videoID')
video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
video_url = 'http://tcdn.ustream.tv/video/%s' % video_id
webpage = self._download_webpage(url, video_id)
self.report_extraction(video_id)
video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
webpage, u'title')
webpage, 'title')
uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
webpage, u'uploader', fatal=False, flags=re.DOTALL)
webpage, 'uploader', fatal=False, flags=re.DOTALL)
thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
webpage, u'thumbnail', fatal=False)
webpage, 'thumbnail', fatal=False)
return {
'id': video_id,
'url': video_url,
'ext': 'flv',
'title': video_title,
'uploader': uploader,
'thumbnail': thumbnail,
}
info = {
'id': video_id,
'url': video_url,
'ext': 'flv',
'title': video_title,
'uploader': uploader,
'thumbnail': thumbnail,
}
return info
class UstreamChannelIE(InfoExtractor):
_VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
IE_NAME = u'ustream:channel'
IE_NAME = 'ustream:channel'
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)

View File

@@ -1,3 +1,6 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@@ -10,45 +13,44 @@ from ..utils import (
class Vbox7IE(InfoExtractor):
"""Information Extractor for Vbox7"""
_VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)'
_VALID_URL = r'http://(www\.)?vbox7\.com/play:(?P<id>[^/]+)'
_TEST = {
u'url': u'http://vbox7.com/play:249bb972c2',
u'file': u'249bb972c2.flv',
u'md5': u'99f65c0c9ef9b682b97313e052734c3f',
u'info_dict': {
u"title": u"\u0421\u043c\u044f\u0445! \u0427\u0443\u0434\u043e - \u0447\u0438\u0441\u0442 \u0437\u0430 \u0441\u0435\u043a\u0443\u043d\u0434\u0438 - \u0421\u043a\u0440\u0438\u0442\u0430 \u043a\u0430\u043c\u0435\u0440\u0430"
}
'url': 'http://vbox7.com/play:249bb972c2',
'md5': '99f65c0c9ef9b682b97313e052734c3f',
'info_dict': {
'id': '249bb972c2',
'ext': 'flv',
'title': 'Смях! Чудо - чист за секунди - Скрита камера',
},
}
def _real_extract(self,url):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group(1)
video_id = mobj.group('id')
redirect_page, urlh = self._download_webpage_handle(url, video_id)
new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location')
new_location = self._search_regex(r'window\.location = \'(.*)\';',
redirect_page, 'redirect location')
redirect_url = urlh.geturl() + new_location
webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page')
webpage = self._download_webpage(redirect_url, video_id,
'Downloading redirect page')
title = self._html_search_regex(r'<title>(.*)</title>',
webpage, u'title').split('/')[0].strip()
webpage, 'title').split('/')[0].strip()
ext = "flv"
info_url = "http://vbox7.com/play/magare.do"
data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id})
data = compat_urllib_parse.urlencode({'as3': '1', 'vid': video_id})
info_request = compat_urllib_request.Request(info_url, data)
info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage')
info_response = self._download_webpage(info_request, video_id, 'Downloading info webpage')
if info_response is None:
raise ExtractorError(u'Unable to extract the media url')
raise ExtractorError('Unable to extract the media url')
(final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
return [{
'id': video_id,
'url': final_url,
'ext': ext,
'title': title,
return {
'id': video_id,
'url': final_url,
'ext': 'flv',
'title': title,
'thumbnail': thumbnail_url,
}]
}

View File

@@ -1,5 +1,6 @@
from __future__ import unicode_literals
import re
import json
import xml.etree.ElementTree
import datetime
@@ -22,16 +23,16 @@ class VevoIE(InfoExtractor):
vevo:)
(?P<id>[^&?#]+)'''
_TESTS = [{
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
u'file': u'GB1101300280.mp4',
u"md5": u"06bea460acb744eab74a9d7dcb4bfd61",
u'info_dict': {
u"upload_date": u"20130624",
u"uploader": u"Hurts",
u"title": u"Somebody to Die For",
u"duration": 230.12,
u"width": 1920,
u"height": 1080,
'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
'file': 'GB1101300280.mp4',
"md5": "06bea460acb744eab74a9d7dcb4bfd61",
'info_dict': {
"upload_date": "20130624",
"uploader": "Hurts",
"title": "Somebody to Die For",
"duration": 230.12,
"width": 1920,
"height": 1080,
}
}]
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
@@ -44,7 +45,7 @@ class VevoIE(InfoExtractor):
if version['version'] > last_version['version']:
last_version = version
if last_version['version'] == -1:
raise ExtractorError(u'Unable to extract last version of the video')
raise ExtractorError('Unable to extract last version of the video')
renditions = xml.etree.ElementTree.fromstring(last_version['data'])
formats = []
@@ -85,7 +86,7 @@ class VevoIE(InfoExtractor):
format_url = self._SMIL_BASE_URL + m.group('path')
formats.append({
'url': format_url,
'format_id': u'SMIL_' + m.group('cbr'),
'format_id': 'SMIL_' + m.group('cbr'),
'vcodec': m.group('vcodec'),
'acodec': m.group('acodec'),
'vbr': int(m.group('vbr')),
@@ -101,26 +102,25 @@ class VevoIE(InfoExtractor):
video_id = mobj.group('id')
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
video_info = json.loads(info_json)['video']
video_info = self._download_json(json_url, video_id)['video']
formats = self._formats_from_json(video_info)
try:
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
self._SMIL_BASE_URL, video_id, video_id.lower())
smil_xml = self._download_webpage(smil_url, video_id,
u'Downloading SMIL info')
'Downloading SMIL info')
formats.extend(self._formats_from_smil(smil_xml))
except ExtractorError as ee:
if not isinstance(ee.cause, compat_HTTPError):
raise
self._downloader.report_warning(
u'Cannot download SMIL information, falling back to JSON ..')
'Cannot download SMIL information, falling back to JSON ..')
timestamp_ms = int(self._search_regex(
r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))
r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
info = {
return {
'id': video_id,
'title': video_info['title'],
'formats': formats,
@@ -129,5 +129,3 @@ class VevoIE(InfoExtractor):
'uploader': video_info['mainArtists'][0]['artistName'],
'duration': video_info['duration'],
}
return info

View File

@@ -6,10 +6,10 @@ import re
import itertools
from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..utils import (
compat_urllib_parse,
compat_urllib_request,
clean_html,
get_element_by_attribute,
ExtractorError,
@@ -19,7 +19,7 @@ from ..utils import (
)
class VimeoIE(InfoExtractor):
class VimeoIE(SubtitlesInfoExtractor):
"""Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs
@@ -84,6 +84,20 @@ class VimeoIE(InfoExtractor):
'videopassword': 'youtube-dl',
},
},
{
'url': 'http://vimeo.com/76979871',
'md5': '3363dd6ffebe3784d56f4132317fd446',
'note': 'Video with subtitles',
'info_dict': {
'id': '76979871',
'ext': 'mp4',
'title': 'The New Vimeo Player (You Know, For Videos)',
'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
'upload_date': '20131015',
'uploader_id': 'staff',
'uploader': 'Vimeo Staff',
}
},
]
def _login(self):
@@ -273,19 +287,31 @@ class VimeoIE(InfoExtractor):
if len(formats) == 0:
raise ExtractorError('No known codec found')
subtitles = {}
text_tracks = config['request'].get('text_tracks')
if text_tracks:
for tt in text_tracks:
subtitles[tt['lang']] = 'http://vimeo.com' + tt['url']
video_subtitles = self.extract_subtitles(video_id, subtitles)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
return {
'id': video_id,
'id': video_id,
'uploader': video_uploader,
'uploader_id': video_uploader_id,
'upload_date': video_upload_date,
'title': video_title,
'thumbnail': video_thumbnail,
'description': video_description,
'upload_date': video_upload_date,
'title': video_title,
'thumbnail': video_thumbnail,
'description': video_description,
'formats': formats,
'webpage_url': url,
'view_count': view_count,
'like_count': like_count,
'comment_count': comment_count,
'subtitles': video_subtitles,
}

View File

@@ -1,18 +1,21 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class VineIE(InfoExtractor):
_VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
_VALID_URL = r'https?://(?:www\.)?vine\.co/v/(?P<id>\w+)'
_TEST = {
u'url': u'https://vine.co/v/b9KOOWX7HUx',
u'file': u'b9KOOWX7HUx.mp4',
u'md5': u'2f36fed6235b16da96ce9b4dc890940d',
u'info_dict': {
u"uploader": u"Jack Dorsey",
u"title": u"Chicken."
}
'url': 'https://vine.co/v/b9KOOWX7HUx',
'md5': '2f36fed6235b16da96ce9b4dc890940d',
'info_dict': {
'id': 'b9KOOWX7HUx',
'ext': 'mp4',
'uploader': 'Jack Dorsey',
'title': 'Chicken.',
},
}
def _real_extract(self, url):
@@ -24,17 +27,17 @@ class VineIE(InfoExtractor):
self.report_extraction(video_id)
video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
webpage, u'video URL')
video_url = self._html_search_meta('twitter:player:stream', webpage,
'video URL')
uploader = self._html_search_regex(r'<p class="username">(.*?)</p>',
webpage, u'uploader', fatal=False, flags=re.DOTALL)
webpage, 'uploader', fatal=False, flags=re.DOTALL)
return [{
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': self._og_search_title(webpage),
return {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': self._og_search_title(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'uploader': uploader,
}]
'uploader': uploader,
}

View File

@@ -0,0 +1,80 @@
from __future__ import unicode_literals
import re
import datetime
from .common import InfoExtractor
class VubeIE(InfoExtractor):
IE_NAME = 'vube'
IE_DESC = 'Vube.com'
_VALID_URL = r'http://vube\.com/[^/]+/(?P<id>[\da-zA-Z]{10})'
_TEST = {
'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
'md5': 'f81dcf6d0448e3291f54380181695821',
'info_dict': {
'id': 'YL2qNPkqon',
'ext': 'mp4',
'title': 'Chiara Grispo - Price Tag by Jessie J',
'description': 'md5:8ea652a1f36818352428cb5134933313',
'thumbnail': 'http://frame.thestaticvube.com/snap/228x128/102e7e63057-5ebc-4f5c-4065-6ce4ebde131f.jpg',
'uploader': 'Chiara.Grispo',
'uploader_id': '1u3hX0znhP',
'upload_date': '20140103',
'duration': 170.56
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video = self._download_json('http://vube.com/api/v2/video/%s' % video_id,
video_id, 'Downloading video JSON')
public_id = video['public_id']
formats = [{'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (fmt['media_resolution_id'], public_id),
'height': int(fmt['height']),
'abr': int(fmt['audio_bitrate']),
'vbr': int(fmt['video_bitrate']),
'format_id': fmt['media_resolution_id']
} for fmt in video['mtm'] if fmt['transcoding_status'] == 'processed']
self._sort_formats(formats)
title = video['title']
description = video.get('description')
thumbnail = video['thumbnail_src']
if thumbnail.startswith('//'):
thumbnail = 'http:' + thumbnail
uploader = video['user_alias']
uploader_id = video['user_url_id']
upload_date = datetime.datetime.fromtimestamp(int(video['upload_time'])).strftime('%Y%m%d')
duration = video['duration']
view_count = video['raw_view_count']
like_count = video['total_likes']
dislike_count= video['total_hates']
comment = self._download_json('http://vube.com/api/video/%s/comment' % video_id,
video_id, 'Downloading video comment JSON')
comment_count = comment['total']
return {
'id': video_id,
'formats': formats,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'uploader_id': uploader_id,
'upload_date': upload_date,
'duration': duration,
'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,
'comment_count': comment_count,
}

View File

@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
unescapeHTML,
ExtractorError,
)

View File

@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@@ -9,12 +11,12 @@ from ..utils import (
class YouJizzIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+)\.html$'
_TEST = {
u'url': u'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
u'file': u'2189178.flv',
u'md5': u'07e15fa469ba384c7693fd246905547c',
u'info_dict': {
u"title": u"Zeichentrick 1",
u"age_limit": 18,
'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
'file': '2189178.flv',
'md5': '07e15fa469ba384c7693fd246905547c',
'info_dict': {
"title": "Zeichentrick 1",
"age_limit": 18,
}
}
@@ -30,12 +32,12 @@ class YouJizzIE(InfoExtractor):
# Get the video title
video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
webpage, u'title').strip()
webpage, 'title').strip()
# Get the embed page
result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
if result is None:
raise ExtractorError(u'ERROR: unable to extract embed page')
raise ExtractorError('ERROR: unable to extract embed page')
embed_page_url = result.group(0).strip()
video_id = result.group('videoid')
@@ -47,23 +49,23 @@ class YouJizzIE(InfoExtractor):
if m_playlist is not None:
playlist_url = m_playlist.group('playlist')
playlist_page = self._download_webpage(playlist_url, video_id,
u'Downloading playlist page')
'Downloading playlist page')
m_levels = list(re.finditer(r'<level bitrate="(\d+?)" file="(.*?)"', playlist_page))
if len(m_levels) == 0:
raise ExtractorError(u'Unable to extract video url')
raise ExtractorError('Unable to extract video url')
videos = [(int(m.group(1)), m.group(2)) for m in m_levels]
(_, video_url) = sorted(videos)[0]
video_url = video_url.replace('%252F', '%2F')
else:
video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);',
webpage, u'video URL')
webpage, 'video URL')
info = {'id': video_id,
'url': video_url,
'title': video_title,
'ext': 'flv',
'format': 'flv',
'player_url': embed_page_url,
'age_limit': age_limit}
return [info]
return {
'id': video_id,
'url': video_url,
'title': video_title,
'ext': 'flv',
'format': 'flv',
'player_url': embed_page_url,
'age_limit': age_limit,
}

View File

@@ -502,7 +502,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return a % b
m = re.match(
r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
if m:
fname = m.group('func')
if fname not in functions:
@@ -1422,7 +1422,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
IE_DESC = u'YouTube.com playlists'
_VALID_URL = r"""(?:
_VALID_URL = r"""(?x)(?:
(?:https?://)?
(?:\w+\.)?
youtube\.com/
@@ -1431,7 +1431,11 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
\? (?:.*?&)*? (?:p|a|list)=
| p/
)
((?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,})
(
(?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
# Top tracks, they can also include dots
|(?:MC)[\w\.]*
)
.*
|
((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
@@ -1441,11 +1445,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
_VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
IE_NAME = u'youtube:playlist'
@classmethod
def suitable(cls, url):
"""Receives a URL and returns True if suitable for this IE."""
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
def _real_initialize(self):
self._login()
@@ -1469,7 +1468,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
def _real_extract(self, url):
# Extract playlist id
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
playlist_id = mobj.group(1) or mobj.group(2)

View File

@@ -751,13 +751,14 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
https_request = http_request
https_response = http_response
def unified_strdate(date_str):
"""Return a string with the date in the format YYYYMMDD"""
upload_date = None
#Replace commas
date_str = date_str.replace(',',' ')
# %z (UTC offset) is only supported in python>=3.2
date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
date_str = re.sub(r' ?(\+|-)[0-9:]*$', '', date_str)
format_expressions = [
'%d %B %Y',
'%B %d %Y',
@@ -771,11 +772,12 @@ def unified_strdate(date_str):
'%Y-%m-%dT%H:%M:%S.%fZ',
'%Y-%m-%dT%H:%M:%S.%f0Z',
'%Y-%m-%dT%H:%M:%S',
'%Y-%m-%dT%H:%M',
]
for expression in format_expressions:
try:
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
except:
except ValueError:
pass
if upload_date is None:
timetuple = email.utils.parsedate_tz(date_str)

View File

@@ -1,2 +1,2 @@
__version__ = '2014.01.29'
__version__ = '2014.02.08'