Compare commits

...

84 Commits

Author SHA1 Message Date
7a1e71575e release 2016.07.02 2016-07-02 02:47:42 +07:00
ac2d8f54d1 [vine] Remove superfluous whitespace 2016-07-02 02:45:00 +07:00
14ff6baa0e [fusion] Improve 2016-07-02 02:44:37 +07:00
bb08101ec4 [Fusion] Add new extractor 2016-07-02 02:37:28 +07:00
bc4b2d75ba [pornhub] Add support for thumbzilla (Closes #8696) 2016-07-02 02:11:07 +07:00
35fc3021ba [periscope] Add another fallback source 2016-07-02 01:35:57 +07:00
347227237b [periscope] fix playlist extraction (#9967)
The JSON response changed and the extractor needed to be updated in order to gather the video IDs.
2016-07-02 01:29:11 +07:00
564dc3c6e8 [vine] Fix extraction (Closes #9970) 2016-07-02 01:24:57 +07:00
9f4576a7eb [twitch] Update usher URL (Closes #9975) 2016-07-01 23:16:43 +07:00
f11315e8d4 release 2016.07.01 2016-07-01 03:59:57 +07:00
0c2ac64bb8 [sixplay] Rename preference key to quality in format dict 2016-07-01 03:57:59 +07:00
a9eede3913 [test/compat] compat_shlex_split: test with newlines 2016-07-01 03:30:35 +07:00
9e29ef13a3 [options] Accept quoted string across multiple lines (#9940)
Like:

    -f "
    bestvideo+bestaudio/
    best
    "
2016-07-01 03:30:31 +07:00
eaaaaec042 [pornhub] Add more tests with removed videos 2016-07-01 03:18:27 +07:00
3cb3b60064 [pornhub] Relax removed message regex (Closes #9964) 2016-07-01 03:14:23 +07:00
044e3d91b5 [Pornhub] Fix error detection 2016-07-01 02:59:50 +07:00
c9e538a3b1 [ctvnews] use orderedSet, increase the number of items for playlists and use smaller bin list for test 2016-06-30 19:52:32 +01:00
76dad392f5 [meta] Clarify the source of uppod st decryption algorithm 2016-06-30 18:27:57 +01:00
9617b557aa [ctv] Add new extractor(closes #4077) 2016-06-30 18:22:35 +01:00
bf4fa24414 [ctvnews] Add new extractor(closes #2156) 2016-06-30 18:22:35 +01:00
20361b4f25 [rds] extract 9c9media formats 2016-06-30 18:22:35 +01:00
05a0068a76 [9c9media] Add new extractor 2016-06-30 18:22:35 +01:00
66a42309fa release 2016.06.30 2016-06-30 23:56:55 +07:00
fd94e2671a [meta] Add support for pladform embeds 2016-06-30 23:20:44 +07:00
8ff6697861 [pladform] Improve embed detection 2016-06-30 23:19:29 +07:00
eafa643715 [meta] Make duration and description optional
For iframe URLs
2016-06-30 23:06:13 +07:00
049da7cb6c [meta] Extend _VALID_URL 2016-06-30 23:04:18 +07:00
7dbeee7e22 [generic] make twitter:player extraction non fatal 2016-06-30 14:11:55 +01:00
93ad6c6bfa [sixplay] Add new extractor(closes #2183) 2016-06-30 13:50:49 +01:00
329179073b [generic] add generic support for twitter:player embeds 2016-06-30 12:01:30 +01:00
4d86d2008e [urplay] fix typo and check with flake8 2016-06-30 11:30:42 +01:00
ab47b6e881 [theatlantic] Add new extractor(closes #6611) 2016-06-30 04:08:56 +01:00
df43389ade [skysports] Add new extractor(closes #7066) 2016-06-30 02:54:21 +01:00
397b305cfe [meta] Add new extractor(closes #8789) 2016-06-30 00:21:03 +01:00
e496fa50cd [urplay] Add new extractor(closes #9332) 2016-06-29 20:19:31 +01:00
06a96da15b [eagleplatform] Improve embed detection and extract in separate routine (Closes #9926) 2016-06-29 23:01:34 +07:00
70157c2c43 [aenetworks] add support for movie pages 2016-06-29 16:55:17 +01:00
c58ed8563d [aenetworks] extract history topic playlist title 2016-06-29 16:18:16 +01:00
4c7821227c [aenetworks:historytopic] fix topic video url 2016-06-29 16:03:32 +01:00
42362fdb5e [aenetworks] add support for show and season for A&E Network sites and History topics(closes #9816) 2016-06-29 15:49:17 +01:00
97124e572d [arte:playlist] Fix test 2016-06-28 22:39:53 +07:00
32616c14cc [vrt] extract all formats 2016-06-28 14:02:03 +01:00
8174d0fe95 release 2016.06.27 2016-06-27 23:09:39 +07:00
8704778d95 [pbs] Check manually constructed http links (Closes #9921) 2016-06-27 23:06:42 +07:00
c287f2bc60 [extractor/generic] Use _extract_url for kaltura embeds (Closes #9922) 2016-06-27 22:45:26 +07:00
9ea5c04c0d [kaltura] Add _extract_url with fixed regex 2016-06-27 22:44:17 +07:00
fd7a7498a4 [test_all_urls] PEP 8 and change wording 2016-06-27 22:11:45 +07:00
e3a6747d8f New test-case: extractor names are supposed to be unique
@dstftw explained in
https://github.com/rg3/youtube-dl/pull/9918#issuecomment-228625878 that
extractor names are supposed to be unique. @dstftw has fixed the two
offending extractors, and here I add a test to ensure this does not
happen in the future.
2016-06-27 22:09:29 +07:00
f41ffc00d1 [skynewsarabia:article] Clarify IE_NAME 2016-06-27 05:08:09 +07:00
81fda15369 [sr:mediathek] Clarify IE_NAME 2016-06-27 05:07:12 +07:00
427cd050a3 [extractor/generic] Improve kaltura embed detection (Closes #9911) 2016-06-27 04:11:53 +07:00
b0c200f1ec [msn] Add test URL with non-alphanumeric characters 2016-06-26 22:03:36 +07:00
92747e664a release 2016.06.26 2016-06-26 21:15:24 +07:00
f1f336322d [msn] Fix extraction (Closes #8960, closes #9542) 2016-06-26 21:10:05 +07:00
bf8dd79045 [extractor/common] Fix sorting with custom field preference 2016-06-26 21:09:07 +07:00
c6781156aa [MSN] add new extractor 2016-06-26 21:07:59 +07:00
f484c5fa25 [vidbit] Improve (Closes #9759) 2016-06-26 16:59:28 +07:00
88d9f6c0c4 [utils] Add support for name list in _html_search_meta 2016-06-26 16:57:14 +07:00
3c9c088f9c [Vidbit] Add new extractor 2016-06-26 16:52:52 +07:00
fc3996bfe1 [iqiyi] Remove codes for debugging 2016-06-26 15:45:41 +08:00
5b6ad8630c [iqiyi] Partially fix IqiyiIE
Use the HTML5 API. Only low-resolution formats available

Related: #9839

Thanks @zhangn1985 for the overall algorithm (soimort/you-get#1224)
2016-06-26 15:18:32 +08:00
30105f4ac0 [le] Move urshift() to utils.py 2016-06-26 15:17:26 +08:00
1143535d76 [utils] Add urshift()
Used in IqiyiIE and LeIE
2016-06-26 15:16:49 +08:00
7d52c052ef [generic] Fix test_Generic_76
Broken: https://travis-ci.org/rg3/youtube-dl/jobs/140251658
2016-06-26 11:56:27 +08:00
a2406fce3c Fix misspelling 2016-06-26 01:28:55 +07:00
3b34ab538c [svtplay] Extend _VALID_URL (#9900) 2016-06-26 00:29:53 +07:00
ac782306f1 [iqiyi] Mark broken 2016-06-26 00:25:41 +07:00
0c00e889f3 Credit @JakubAdamWieczorek for #9813 2016-06-25 23:35:57 +07:00
ce96ed05f4 [polskieradio] Add test with video 2016-06-25 23:31:21 +07:00
0463b77a1f [polskieradio] Improve extraction (Closes #9813) 2016-06-25 23:19:18 +07:00
2d185706ea [polskieradio] Add support for Polskie Radio.
Polskie Radio is the main Polish state-funded radio broadcasting service.
2016-06-25 23:19:18 +07:00
b72b44318c [utils] Add strip_or_none 2016-06-25 23:19:18 +07:00
46f59e89ea [utils] Add unified_timestamp 2016-06-25 23:19:18 +07:00
b4241e308e release 2016.06.25 2016-06-25 03:03:20 +07:00
3d4b08dfc7 [setup.py] Add file version information and quotes consistency (Closes #9878) 2016-06-25 02:50:12 +07:00
be49068d65 [youtube] Fix and skip some tests 2016-06-24 22:47:19 +07:00
525cedb971 [youtube] Relax URL expansion in description 2016-06-24 22:37:13 +07:00
de3c7fe0d4 [youtube] Fix 141 format tests 2016-06-24 22:27:55 +07:00
896cc72750 [mixcloud] View count and like count may be absent
Closes #9874
2016-06-24 17:26:12 +08:00
c1ff6e1ad0 [vimeo:review] Fix extraction for password-protected videos
Closes #9853
2016-06-24 16:48:37 +08:00
fee70322d7 [appletrailers] correct thumbnail fallback 2016-06-23 19:03:34 +01:00
8065d6c55f [dcn] extend _VALID_URL for awaan.ae and extract all available formats 2016-06-23 17:22:15 +01:00
494172d2e5 [appletrailers] extract info from an alternative source if available(closes #8422)(closes #8422) 2016-06-23 15:49:42 +01:00
6e3c2047f8 [tvp] extract all formats and detect erros 2016-06-23 04:36:16 +01:00
50 changed files with 1593 additions and 583 deletions

View File

@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.23.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.23.1**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.02**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.06.23.1
[debug] youtube-dl version 2016.07.02
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@ -175,3 +175,4 @@ Tomáš Čech
Déstin Reed
Roman Tsiupa
Artur Krysiak
Jakub Adam Wieczorek

View File

@ -45,7 +45,6 @@
- **archive.org**: archive.org videos
- **ARD**
- **ARD:mediathek**
- **ARD:mediathek**: Saarländischer Rundfunk
- **arte.tv**
- **arte.tv:+7**
- **arte.tv:cinema**
@ -153,6 +152,8 @@
- **CSNNE**
- **CSpan**: C-SPAN
- **CtsNews**: 華視新聞
- **CTV**
- **CTVNews**
- **culturebox.francetvinfo.fr**
- **CultureUnplugged**
- **CWTV**
@ -241,6 +242,7 @@
- **FreeVideo**
- **Funimation**
- **FunnyOrDie**
- **Fusion**
- **GameInformer**
- **Gamekings**
- **GameOne**
@ -273,6 +275,7 @@
- **Helsinki**: helsinki.fi
- **HentaiStigma**
- **HistoricFilms**
- **history:topic**: History.com Topic
- **hitbox**
- **hitbox:live**
- **HornBunny**
@ -359,6 +362,7 @@
- **MatchTV**
- **MDR**: MDR.DE and KiKA
- **media.ccc.de**
- **META**
- **metacafe**
- **Metacritic**
- **Mgoon**
@ -385,6 +389,7 @@
- **MovieFap**
- **Moviezine**
- **MPORA**
- **MSN**
- **MTV**
- **mtv.de**
- **mtviggy.com**
@ -438,6 +443,7 @@
- **nick.de**
- **niconico**: ニコニコ動画
- **NiconicoPlaylist**
- **NineCNineMedia**
- **njoy**: N-JOY
- **njoy:embed**
- **Noco**
@ -501,8 +507,9 @@
- **plus.google**: Google Plus
- **pluzz.francetv.fr**
- **podomatic**
- **PolskieRadio**
- **PornHd**
- **PornHub**
- **PornHub**: PornHub and Thumbzilla
- **PornHubPlaylist**
- **PornHubUserVideos**
- **Pornotube**
@ -586,8 +593,10 @@
- **Shared**: shared.sx and vivo.sx
- **ShareSix**
- **Sina**
- **SixPlay**
- **skynewsarabia:article**
- **skynewsarabia:video**
- **skynewsarabia:video**
- **SkySports**
- **Slideshare**
- **Slutload**
- **smotri**: Smotri.com
@ -619,6 +628,7 @@
- **SportBoxEmbed**
- **SportDeutschland**
- **Sportschau**
- **sr:mediathek**: Saarländischer Rundfunk
- **SRGSSR**
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
- **SSA**
@ -719,6 +729,7 @@
- **UDNEmbed**: 聯合影音
- **Unistra**
- **Urort**: NRK P3 Urørt
- **URPlay**
- **USAToday**
- **ustream**
- **ustream:channel**
@ -736,6 +747,7 @@
- **vh1.com**
- **Vice**
- **ViceShow**
- **Vidbit**
- **Viddler**
- **video.google:search**: Google Video search
- **video.mit.edu**

View File

@ -21,25 +21,37 @@ try:
import py2exe
except ImportError:
if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
print("Cannot import py2exe", file=sys.stderr)
print('Cannot import py2exe', file=sys.stderr)
exit(1)
py2exe_options = {
"bundle_files": 1,
"compressed": 1,
"optimize": 2,
"dist_dir": '.',
"dll_excludes": ['w9xpopen.exe', 'crypt32.dll'],
'bundle_files': 1,
'compressed': 1,
'optimize': 2,
'dist_dir': '.',
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
}
# Get the version from youtube_dl/version.py without importing the package
exec(compile(open('youtube_dl/version.py').read(),
'youtube_dl/version.py', 'exec'))
DESCRIPTION = 'YouTube video downloader'
LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites'
py2exe_console = [{
"script": "./youtube_dl/__main__.py",
"dest_base": "youtube-dl",
'script': './youtube_dl/__main__.py',
'dest_base': 'youtube-dl',
'version': __version__,
'description': DESCRIPTION,
'comments': LONG_DESCRIPTION,
'product_name': 'youtube-dl',
'product_version': __version__,
}]
py2exe_params = {
'console': py2exe_console,
'options': {"py2exe": py2exe_options},
'options': {'py2exe': py2exe_options},
'zipfile': None
}
@ -72,7 +84,7 @@ else:
params['scripts'] = ['bin/youtube-dl']
class build_lazy_extractors(Command):
description = "Build the extractor lazy loading module"
description = 'Build the extractor lazy loading module'
user_options = []
def initialize_options(self):
@ -87,16 +99,11 @@ class build_lazy_extractors(Command):
dry_run=self.dry_run,
)
# Get the version from youtube_dl/version.py without importing the package
exec(compile(open('youtube_dl/version.py').read(),
'youtube_dl/version.py', 'exec'))
setup(
name='youtube_dl',
version=__version__,
description='YouTube video downloader',
long_description='Small command-line program to download videos from'
' YouTube.com and other video sites.',
description=DESCRIPTION,
long_description=LONG_DESCRIPTION,
url='https://github.com/rg3/youtube-dl',
author='Ricardo Garcia',
author_email='ytdl@yt-dl.org',
@ -112,17 +119,17 @@ setup(
# test_requires = ['nosetest'],
classifiers=[
"Topic :: Multimedia :: Video",
"Development Status :: 5 - Production/Stable",
"Environment :: Console",
"License :: Public Domain",
"Programming Language :: Python :: 2.6",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.2",
"Programming Language :: Python :: 3.3",
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5",
'Topic :: Multimedia :: Video',
'Development Status :: 5 - Production/Stable',
'Environment :: Console',
'License :: Public Domain',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.2',
'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
],
cmdclass={'build_lazy_extractors': build_lazy_extractors},

View File

@ -11,7 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL
from youtube_dl.extractor.common import InfoExtractor
from youtube_dl.extractor import YoutubeIE, get_info_extractor
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
class TestIE(InfoExtractor):
@ -66,6 +66,11 @@ class TestInfoExtractor(unittest.TestCase):
self.assertEqual(ie._html_search_meta('d', html), '4')
self.assertEqual(ie._html_search_meta('e', html), '5')
self.assertEqual(ie._html_search_meta('f', html), '6')
self.assertEqual(ie._html_search_meta(('a', 'b', 'c'), html), '1')
self.assertEqual(ie._html_search_meta(('c', 'b', 'a'), html), '3')
self.assertEqual(ie._html_search_meta(('z', 'x', 'c'), html), '3')
self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
def test_download_json(self):
uri = encode_data_uri(b'{"foo": "blah"}', 'application/json')

View File

@ -6,6 +6,7 @@ from __future__ import unicode_literals
import os
import sys
import unittest
import collections
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@ -130,6 +131,15 @@ class TestAllURLsMatching(unittest.TestCase):
'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
['Yahoo'])
def test_no_duplicated_ie_names(self):
name_accu = collections.defaultdict(list)
for ie in self.ies:
name_accu[ie.IE_NAME.lower()].append(type(ie).__name__)
for (ie_name, ie_list) in name_accu.items():
self.assertEqual(
len(ie_list), 1,
'Multiple extractors with the same IE_NAME "%s" (%s)' % (ie_name, ', '.join(ie_list)))
if __name__ == '__main__':
unittest.main()

View File

@ -87,6 +87,7 @@ class TestCompat(unittest.TestCase):
def test_compat_shlex_split(self):
self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag'])
def test_compat_etree_fromstring(self):
xml = '''

View File

@ -60,11 +60,13 @@ from youtube_dl.utils import (
timeconvert,
unescapeHTML,
unified_strdate,
unified_timestamp,
unsmuggle_url,
uppercase_escape,
lowercase_escape,
url_basename,
urlencode_postdata,
urshift,
update_url_query,
version_tuple,
xpath_with_ns,
@ -283,8 +285,28 @@ class TestUtil(unittest.TestCase):
'20150202')
self.assertEqual(unified_strdate('Feb 14th 2016 5:45PM'), '20160214')
self.assertEqual(unified_strdate('25-09-2014'), '20140925')
self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227')
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
def test_unified_timestamps(self):
self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
self.assertEqual(unified_timestamp('8/7/2009'), 1247011200)
self.assertEqual(unified_timestamp('Dec 14, 2012'), 1355443200)
self.assertEqual(unified_timestamp('2012/10/11 01:56:38 +0000'), 1349920598)
self.assertEqual(unified_timestamp('1968 12 10'), -33436800)
self.assertEqual(unified_timestamp('1968-12-10'), -33436800)
self.assertEqual(unified_timestamp('28/01/2014 21:00:00 +0100'), 1390939200)
self.assertEqual(
unified_timestamp('11/26/2014 11:30:00 AM PST', day_first=False),
1417001400)
self.assertEqual(
unified_timestamp('2/2/2015 6:47:40 PM', day_first=False),
1422902860)
self.assertEqual(unified_timestamp('Feb 14th 2016 5:45PM'), 1455471900)
self.assertEqual(unified_timestamp('25-09-2014'), 1411603200)
self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200)
self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
def test_determine_ext(self):
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None)
@ -959,5 +981,9 @@ The first line
self.assertRaises(ValueError, encode_base_n, 0, 70)
self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table)
def test_urshift(self):
self.assertEqual(urshift(3, 1), 1)
self.assertEqual(urshift(-3, 1), 2147483646)
if __name__ == '__main__':
unittest.main()

View File

@ -7,18 +7,123 @@ from ..utils import (
smuggle_url,
update_url_query,
unescapeHTML,
extract_attributes,
get_element_by_attribute,
)
from ..compat import (
compat_urlparse,
)
class AENetworksIE(InfoExtractor):
class AENetworksBaseIE(InfoExtractor):
def theplatform_url_result(self, theplatform_url, video_id, query):
return {
'_type': 'url_transparent',
'id': video_id,
'url': smuggle_url(
update_url_query(theplatform_url, query),
{
'sig': {
'key': 'crazyjava',
'secret': 's3cr3t'
},
'force_smil_url': True
}),
'ie_key': 'ThePlatform',
}
class AENetworksIE(AENetworksBaseIE):
IE_NAME = 'aenetworks'
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
_VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?P<type>[^/]+)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
_VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)/full-movie)'
_TESTS = [{
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
'md5': '8ff93eb073449f151d6b90c0ae1ef0c7',
'info_dict': {
'id': '22253814',
'ext': 'mp4',
'title': 'Winter Is Coming',
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
'timestamp': 1338306241,
'upload_date': '20120529',
'uploader': 'AENE-NEW',
},
'add_ie': ['ThePlatform'],
}, {
'url': 'http://www.history.com/shows/ancient-aliens/season-1',
'info_dict': {
'id': '71889446852',
},
'playlist_mincount': 5,
}, {
'url': 'http://www.mylifetime.com/shows/atlanta-plastic',
'info_dict': {
'id': 'SERIES4317',
'title': 'Atlanta Plastic',
},
'playlist_mincount': 2,
}, {
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
'only_matching': True
}, {
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
'only_matching': True
}, {
'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
'only_matching': True
}, {
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
'only_matching': True
}]
def _real_extract(self, url):
show_path, movie_display_id = re.match(self._VALID_URL, url).groups()
display_id = show_path or movie_display_id
webpage = self._download_webpage(url, display_id)
if show_path:
url_parts = show_path.split('/')
url_parts_len = len(url_parts)
if url_parts_len == 1:
entries = []
for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage):
entries.append(self.url_result(
compat_urlparse.urljoin(url, season_url_path), 'AENetworks'))
return self.playlist_result(
entries, self._html_search_meta('aetn:SeriesId', webpage),
self._html_search_meta('aetn:SeriesTitle', webpage))
elif url_parts_len == 2:
entries = []
for episode_item in re.findall(r'(?s)<div[^>]+class="[^"]*episode-item[^"]*"[^>]*>', webpage):
episode_attributes = extract_attributes(episode_item)
episode_url = compat_urlparse.urljoin(
url, episode_attributes['data-canonical'])
entries.append(self.url_result(
episode_url, 'AENetworks',
episode_attributes['data-videoid']))
return self.playlist_result(
entries, self._html_search_meta('aetn:SeasonId', webpage))
video_id = self._html_search_meta('aetn:VideoID', webpage)
media_url = self._search_regex(
r"media_url\s*=\s*'([^']+)'", webpage, 'video url')
info = self._search_json_ld(webpage, video_id, fatal=False)
info.update(self.theplatform_url_result(
media_url, video_id, {
'mbr': 'true',
'assetTypes': 'medium_video_s3'
}))
return info
class HistoryTopicIE(AENetworksBaseIE):
IE_NAME = 'history:topic'
IE_DESC = 'History.com Topic'
_VALID_URL = r'https?://(?:www\.)?history\.com/topics/(?:[^/]+/)?(?P<topic_id>[^/]+)/videos(?:/(?P<video_display_id>[^/?#]+))?'
_TESTS = [{
'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
'info_dict': {
'id': 'g12m5Gyt3fdR',
'id': '40700995724',
'ext': 'mp4',
'title': "Bet You Didn't Know: Valentine's Day",
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
@ -31,57 +136,39 @@ class AENetworksIE(InfoExtractor):
'skip_download': True,
},
'add_ie': ['ThePlatform'],
'expected_warnings': ['JSON-LD'],
}, {
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
'md5': '8ff93eb073449f151d6b90c0ae1ef0c7',
'info_dict': {
'id': 'eg47EERs_JsZ',
'ext': 'mp4',
'title': 'Winter Is Coming',
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
'timestamp': 1338306241,
'upload_date': '20120529',
'uploader': 'AENE-NEW',
'url': 'http://www.history.com/topics/world-war-i/world-war-i-history/videos',
'info_dict':
{
'id': 'world-war-i-history',
'title': 'World War I History',
},
'add_ie': ['ThePlatform'],
'playlist_mincount': 24,
}, {
'url': 'http://www.aetv.com/shows/duck-dynasty/video/inlawful-entry',
'only_matching': True
}, {
'url': 'http://www.fyi.tv/shows/tiny-house-nation/videos/207-sq-ft-minnesota-prairie-cottage',
'only_matching': True
}, {
'url': 'http://www.mylifetime.com/shows/project-runway-junior/video/season-1/episode-6/superstar-clients',
'only_matching': True
'url': 'http://www.history.com/topics/world-war-i-history/videos',
'only_matching': True,
}]
def _real_extract(self, url):
page_type, video_id = re.match(self._VALID_URL, url).groups()
topic_id, video_display_id = re.match(self._VALID_URL, url).groups()
if video_display_id:
webpage = self._download_webpage(url, video_display_id)
release_url, video_id = re.search(r"_videoPlayer.play\('([^']+)'\s*,\s*'[^']+'\s*,\s*'(\d+)'\)", webpage).groups()
release_url = unescapeHTML(release_url)
webpage = self._download_webpage(url, video_id)
video_url_re = [
r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
r"media_url\s*=\s*'([^']+)'"
]
video_url = unescapeHTML(self._search_regex(video_url_re, webpage, 'video url'))
query = {'mbr': 'true'}
if page_type == 'shows':
query['assetTypes'] = 'medium_video_s3'
if 'switch=hds' in video_url:
query['switch'] = 'hls'
info = self._search_json_ld(webpage, video_id, fatal=False)
info.update({
'_type': 'url_transparent',
'url': smuggle_url(
update_url_query(video_url, query),
{
'sig': {
'key': 'crazyjava',
'secret': 's3cr3t'},
'force_smil_url': True
}),
})
return info
return self.theplatform_url_result(
release_url, video_id, {
'mbr': 'true',
'switch': 'hls'
})
else:
webpage = self._download_webpage(url, topic_id)
entries = []
for episode_item in re.findall(r'<a.+?data-release-url="[^"]+"[^>]*>', webpage):
video_attributes = extract_attributes(episode_item)
entries.append(self.theplatform_url_result(
video_attributes['data-release-url'], video_attributes['data-id'], {
'mbr': 'true',
'switch': 'hls'
}))
return self.playlist_result(entries, topic_id, get_element_by_attribute('class', 'show-title', webpage))

View File

@ -7,6 +7,8 @@ from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
int_or_none,
parse_duration,
unified_strdate,
)
@ -16,7 +18,8 @@ class AppleTrailersIE(InfoExtractor):
_TESTS = [{
'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
'info_dict': {
'id': 'manofsteel',
'id': '5111',
'title': 'Man of Steel',
},
'playlist': [
{
@ -70,6 +73,15 @@ class AppleTrailersIE(InfoExtractor):
'id': 'blackthorn',
},
'playlist_mincount': 2,
'expected_warnings': ['Unable to download JSON metadata'],
}, {
# json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json
'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/',
'info_dict': {
'id': '15881',
'title': 'Kung Fu Panda 3',
},
'playlist_mincount': 4,
}, {
'url': 'http://trailers.apple.com/ca/metropole/autrui/',
'only_matching': True,
@ -85,6 +97,45 @@ class AppleTrailersIE(InfoExtractor):
movie = mobj.group('movie')
uploader_id = mobj.group('company')
webpage = self._download_webpage(url, movie)
film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
film_data = self._download_json(
'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id,
film_id, fatal=False)
if film_data:
entries = []
for clip in film_data.get('clips', []):
clip_title = clip['title']
formats = []
for version, version_data in clip.get('versions', {}).items():
for size, size_data in version_data.get('sizes', {}).items():
src = size_data.get('src')
if not src:
continue
formats.append({
'format_id': '%s-%s' % (version, size),
'url': re.sub(r'_(\d+p.mov)', r'_h\1', src),
'width': int_or_none(size_data.get('width')),
'height': int_or_none(size_data.get('height')),
'language': version[:2],
})
self._sort_formats(formats)
entries.append({
'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(),
'formats': formats,
'title': clip_title,
'thumbnail': clip.get('screen') or clip.get('thumb'),
'duration': parse_duration(clip.get('runtime') or clip.get('faded')),
'upload_date': unified_strdate(clip.get('posted')),
'uploader_id': uploader_id,
})
page_data = film_data.get('page', {})
return self.playlist_result(entries, film_id, page_data.get('movie_title'))
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
def fix_html(s):

View File

@ -419,6 +419,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
'info_dict': {
'id': 'PL-013263',
'title': 'Areva & Uramin',
'description': 'md5:a1dc0312ce357c262259139cfd48c9bf',
},
'playlist_mincount': 6,
}, {

View File

@ -749,10 +749,12 @@ class InfoExtractor(object):
return self._og_search_property('url', html, **kargs)
def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
if not isinstance(name, (list, tuple)):
name = [name]
if display_name is None:
display_name = name
display_name = name[0]
return self._html_search_regex(
self._meta_regex(name),
[self._meta_regex(n) for n in name],
html, display_name, fatal=fatal, group='content', **kwargs)
def _dc_search_uploader(self, html):
@ -876,7 +878,11 @@ class InfoExtractor(object):
f['ext'] = determine_ext(f['url'])
if isinstance(field_preference, (list, tuple)):
return tuple(f.get(field) if f.get(field) is not None else -1 for field in field_preference)
return tuple(
f.get(field)
if f.get(field) is not None
else ('' if field == 'format_id' else -1)
for field in field_preference)
preference = f.get('preference')
if preference is None:

View File

@ -0,0 +1,30 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class CTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ctv\.ca/video/player\?vid=(?P<id>[0-9.]+)'
_TESTS = [{
'url': 'http://www.ctv.ca/video/player?vid=706966',
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
'info_dict': {
'id': '706966',
'ext': 'mp4',
'title': 'Larry Day and Richard Jutras on the TIFF red carpet of \'Stonewall\'',
'description': 'etalk catches up with Larry Day and Richard Jutras on the TIFF red carpet of "Stonewall”.',
'upload_date': '20150919',
'timestamp': 1442624700,
},
'expected_warnings': ['HTTP Error 404'],
}]
def _real_extract(self, url):
video_id = self._match_id(url)
return {
'_type': 'url_transparent',
'id': video_id,
'url': '9c9media:ctv_web:%s' % video_id,
'ie_key': 'NineCNineMedia',
}

View File

@ -0,0 +1,65 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import orderedSet
class CTVNewsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
_TESTS = [{
'url': 'http://www.ctvnews.ca/video?clipId=901995',
'md5': '10deb320dc0ccb8d01d34d12fc2ea672',
'info_dict': {
'id': '901995',
'ext': 'mp4',
'title': 'Extended: \'That person cannot be me\' Johnson says',
'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
'timestamp': 1467286284,
'upload_date': '20160630',
}
}, {
'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224',
'info_dict':
{
'id': '1.2966224',
},
'playlist_mincount': 19,
}, {
'url': 'http://www.ctvnews.ca/video?binId=1.2876780',
'info_dict':
{
'id': '1.2876780',
},
'playlist_mincount': 100,
}, {
'url': 'http://www.ctvnews.ca/1.810401',
'only_matching': True,
}, {
'url': 'http://www.ctvnews.ca/canadiens-send-p-k-subban-to-nashville-in-blockbuster-trade-1.2967231',
'only_matching': True,
}]
def _real_extract(self, url):
page_id = self._match_id(url)
def ninecninemedia_url_result(clip_id):
return {
'_type': 'url_transparent',
'id': clip_id,
'url': '9c9media:ctvnews_web:%s' % clip_id,
'ie_key': 'NineCNineMedia',
}
if page_id.isdigit():
return ninecninemedia_url_result(page_id)
else:
webpage = self._download_webpage('http://www.ctvnews.ca/%s' % page_id, page_id, query={
'ot': 'example.AjaxPageLayout.ot',
'maxItemsPerPage': 1000000,
})
entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet(
re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
return self.playlist_result(entries, page_id)

View File

@ -20,7 +20,7 @@ from ..utils import (
class DCNIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
def _real_extract(self, url):
show_id, video_id, season_id = re.match(self._VALID_URL, url).groups()
@ -55,30 +55,32 @@ class DCNBaseIE(InfoExtractor):
'is_live': is_live,
}
def _extract_video_formats(self, webpage, video_id, entry_protocol):
def _extract_video_formats(self, webpage, video_id, m3u8_entry_protocol):
formats = []
m3u8_url = self._html_search_regex(
r'file\s*:\s*"([^"]+)', webpage, 'm3u8 url', fatal=False)
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=None))
rtsp_url = self._search_regex(
r'<a[^>]+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False)
if rtsp_url:
formats.append({
'url': rtsp_url,
'format_id': 'rtsp',
})
format_url_base = 'http' + self._html_search_regex(
[
r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8',
r'<a[^>]+href="rtsp(://[^"]+)"'
], webpage, 'format url')
# TODO: Current DASH formats are broken - $Time$ pattern in
# <SegmentTemplate> not implemented yet
# formats.extend(self._extract_mpd_formats(
# format_url_base + '/manifest.mpd',
# video_id, mpd_id='dash', fatal=False))
formats.extend(self._extract_m3u8_formats(
format_url_base + '/playlist.m3u8', video_id, 'mp4',
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
format_url_base + '/manifest.f4m',
video_id, f4m_id='hds', fatal=False))
self._sort_formats(formats)
return formats
class DCNVideoIE(DCNBaseIE):
IE_NAME = 'dcn:video'
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/[^/]+|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375',
'info_dict':
{
@ -94,7 +96,10 @@ class DCNVideoIE(DCNBaseIE):
# m3u8 download
'skip_download': True,
},
}
}, {
'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
@ -120,7 +125,7 @@ class DCNVideoIE(DCNBaseIE):
class DCNLiveIE(DCNBaseIE):
IE_NAME = 'dcn:live'
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?live/(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)'
def _real_extract(self, url):
channel_id = self._match_id(url)
@ -147,7 +152,7 @@ class DCNLiveIE(DCNBaseIE):
class DCNSeasonIE(InfoExtractor):
IE_NAME = 'dcn:season'
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
_TEST = {
'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A',
'info_dict':

View File

@ -50,6 +50,14 @@ class EaglePlatformIE(InfoExtractor):
'skip': 'Georestricted',
}]
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1',
webpage)
if mobj is not None:
return mobj.group('url')
@staticmethod
def _handle_error(response):
status = int_or_none(response.get('status', 200))

View File

@ -20,7 +20,10 @@ from .adobetv import (
AdobeTVVideoIE,
)
from .adultswim import AdultSwimIE
from .aenetworks import AENetworksIE
from .aenetworks import (
AENetworksIE,
HistoryTopicIE,
)
from .afreecatv import AfreecaTVIE
from .aftonbladet import AftonbladetIE
from .airmozilla import AirMozillaIE
@ -168,6 +171,8 @@ from .crunchyroll import (
)
from .cspan import CSpanIE
from .ctsnews import CtsNewsIE
from .ctv import CTVIE
from .ctvnews import CTVNewsIE
from .cultureunplugged import CultureUnpluggedIE
from .cwtv import CWTVIE
from .dailymail import DailyMailIE
@ -276,6 +281,7 @@ from .freespeech import FreespeechIE
from .freevideo import FreeVideoIE
from .funimation import FunimationIE
from .funnyordie import FunnyOrDieIE
from .fusion import FusionIE
from .gameinformer import GameInformerIE
from .gamekings import GamekingsIE
from .gameone import (
@ -422,6 +428,7 @@ from .makerschannel import MakersChannelIE
from .makertv import MakerTVIE
from .matchtv import MatchTVIE
from .mdr import MDRIE
from .meta import METAIE
from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
from .mgoon import MgoonIE
@ -454,6 +461,7 @@ from .motherless import MotherlessIE
from .motorsport import MotorsportIE
from .movieclips import MovieClipsIE
from .moviezine import MoviezineIE
from .msn import MSNIE
from .mtv import (
MTVIE,
MTVServicesEmbeddedIE,
@ -521,6 +529,7 @@ from .nick import (
NickDeIE,
)
from .niconico import NiconicoIE, NiconicoPlaylistIE
from .ninecninemedia import NineCNineMediaIE
from .ninegag import NineGagIE
from .noco import NocoIE
from .normalboots import NormalbootsIE
@ -606,6 +615,7 @@ from .pluralsight import (
PluralsightCourseIE,
)
from .podomatic import PodomaticIE
from .polskieradio import PolskieRadioIE
from .porn91 import Porn91IE
from .pornhd import PornHdIE
from .pornhub import (
@ -704,10 +714,12 @@ from .shahid import ShahidIE
from .shared import SharedIE
from .sharesix import ShareSixIE
from .sina import SinaIE
from .sixplay import SixPlayIE
from .skynewsarabia import (
SkyNewsArabiaIE,
SkyNewsArabiaArticleIE,
)
from .skysports import SkySportsIE
from .slideshare import SlideshareIE
from .slutload import SlutloadIE
from .smotri import (
@ -889,6 +901,7 @@ from .udn import UDNEmbedIE
from .digiteka import DigitekaIE
from .unistra import UnistraIE
from .urort import UrortIE
from .urplay import URPlayIE
from .usatoday import USATodayIE
from .ustream import UstreamIE, UstreamChannelIE
from .ustudio import (
@ -915,6 +928,7 @@ from .vice import (
ViceIE,
ViceShowIE,
)
from .vidbit import VidbitIE
from .viddler import ViddlerIE
from .videodetective import VideoDetectiveIE
from .videofyme import VideofyMeIE

View File

@ -0,0 +1,35 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from .ooyala import OoyalaIE
class FusionIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?fusion\.net/video/(?P<id>\d+)'
_TESTS = [{
'url': 'http://fusion.net/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/',
'info_dict': {
'id': 'ZpcWNoMTE6x6uVIIWYpHh0qQDjxBuq5P',
'ext': 'mp4',
'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs',
'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7',
'duration': 140.0,
},
'params': {
'skip_download': True,
},
'add_ie': ['Ooyala'],
}, {
'url': 'http://fusion.net/video/201781',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
ooyala_code = self._search_regex(
r'data-video-id=(["\'])(?P<code>.+?)\1',
webpage, 'ooyala code', group='code')
return OoyalaIE._build_url_result(ooyala_code)

View File

@ -64,6 +64,8 @@ from .liveleak import LiveLeakIE
from .threeqsdn import ThreeQSDNIE
from .theplatform import ThePlatformIE
from .vessel import VesselIE
from .kaltura import KalturaIE
from .eagleplatform import EaglePlatformIE
class GenericIE(InfoExtractor):
@ -920,6 +922,24 @@ class GenericIE(InfoExtractor):
},
'add_ie': ['Kaltura'],
},
{
# Kaltura embedded via quoted entry_id
'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
'info_dict': {
'id': '0_utuok90b',
'ext': 'mp4',
'title': '06_matthew_brender_raj_dutt',
'timestamp': 1466638791,
'upload_date': '20160622',
},
'add_ie': ['Kaltura'],
'expected_warnings': [
'Could not send HEAD request'
],
'params': {
'skip_download': True,
}
},
# Eagle.Platform embed (generic URL)
{
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
@ -1091,12 +1111,17 @@ class GenericIE(InfoExtractor):
# Dailymotion Cloud video
{
'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
'md5': '49444254273501a64675a7e68c502681',
'md5': 'dcaf23ad0c67a256f4278bce6e0bae38',
'info_dict': {
'id': '5585de919473990de4bee11b',
'id': 'x2uy8t3',
'ext': 'mp4',
'title': 'Le débat',
'title': 'Sauvons les abeilles ! - Le débat',
'description': 'md5:d9082128b1c5277987825d684939ca26',
'thumbnail': 're:^https?://.*\.jpe?g$',
'timestamp': 1434970506,
'upload_date': '20150622',
'uploader': 'Public Sénat',
'uploader_id': 'xa9gza',
}
},
# OnionStudios embed
@ -1220,6 +1245,22 @@ class GenericIE(InfoExtractor):
'uploader': 'www.hudl.com',
},
},
# twitter:player embed
{
'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
'md5': 'a3e0df96369831de324f0778e126653c',
'info_dict': {
'id': '4909620399001',
'ext': 'mp4',
'title': 'What Do Black Holes Sound Like?',
'description': 'what do black holes sound like',
'upload_date': '20160524',
'uploader_id': '29913724001',
'timestamp': 1464107587,
'uploader': 'TheAtlantic',
},
'add_ie': ['BrightcoveLegacy'],
}
]
def report_following_redirect(self, new_url):
@ -1903,18 +1944,14 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'), 'Zapiks')
# Look for Kaltura embeds
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?(?P<q1>['\"])wid(?P=q1)\s*:\s*(?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*(?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),", webpage) or
re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
if mobj is not None:
return self.url_result(smuggle_url(
'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
{'source_url': url}), 'Kaltura')
kaltura_url = KalturaIE._extract_url(webpage)
if kaltura_url:
return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
# Look for Eagle.Platform embeds
mobj = re.search(
r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'EaglePlatform')
eagleplatform_url = EaglePlatformIE._extract_url(webpage)
if eagleplatform_url:
return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key())
# Look for ClipYou (uses Eagle.Platform) embeds
mobj = re.search(
@ -2060,6 +2097,11 @@ class GenericIE(InfoExtractor):
'uploader': video_uploader,
}
# https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser
embed_url = self._html_search_meta('twitter:player', webpage, default=None)
if embed_url:
return self.url_result(embed_url)
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True

View File

@ -1,30 +1,25 @@
# coding: utf-8
from __future__ import unicode_literals
import binascii
import hashlib
import itertools
import math
import os
import random
import re
import time
import uuid
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_str,
compat_urllib_parse_urlencode,
compat_urllib_parse_urlparse,
)
from ..utils import (
decode_packed_codes,
ExtractorError,
intlist_to_bytes,
ohdave_rsa_encrypt,
remove_start,
sanitized_Request,
urlencode_postdata,
url_basename,
urshift,
)
@ -171,70 +166,21 @@ class IqiyiIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
'md5': '2cb594dc2781e6c941a110d8f358118b',
'md5': '470a6c160618577166db1a7aac5a3606',
'info_dict': {
'id': '9c1fb1b99d192b21c559e5a1a2cb3c73',
'ext': 'mp4',
'title': '美国德州空中惊现奇异云团 酷似UFO',
'ext': 'f4v',
}
}, {
'url': 'http://www.iqiyi.com/v_19rrhnnclk.html',
'md5': 'f09f0a6a59b2da66a26bf4eda669a4cc',
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb',
'title': '名侦探柯南第752集',
},
'playlist': [{
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb_part1',
'ext': 'f4v',
'title': '名侦探柯南第752集',
},
}, {
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb_part2',
'ext': 'f4v',
'title': '名侦探柯南第752集',
},
}, {
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb_part3',
'ext': 'f4v',
'title': '名侦探柯南第752集',
},
}, {
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb_part4',
'ext': 'f4v',
'title': '名侦探柯南第752集',
},
}, {
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb_part5',
'ext': 'f4v',
'title': '名侦探柯南第752集',
},
}, {
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb_part6',
'ext': 'f4v',
'title': '名侦探柯南第752集',
},
}, {
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb_part7',
'ext': 'f4v',
'title': '名侦探柯南第752集',
},
}, {
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb_part8',
'ext': 'f4v',
'title': '名侦探柯南第752集',
},
}],
'params': {
'skip_download': True,
'ext': 'mp4',
'title': '名侦探柯南 国语版',
},
'skip': 'Geo-restricted to China',
}, {
'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html',
'only_matching': True,
@ -287,13 +233,6 @@ class IqiyiIE(InfoExtractor):
('10', 'h1'),
]
AUTH_API_ERRORS = {
# No preview available (不允许试看鉴权失败)
'Q00505': 'This video requires a VIP account',
# End of preview time (试看结束鉴权失败)
'Q00506': 'Needs a VIP account for full video',
}
def _real_initialize(self):
self._login()
@ -352,177 +291,101 @@ class IqiyiIE(InfoExtractor):
return True
def _authenticate_vip_video(self, api_video_url, video_id, tvid, _uuid, do_report_warning):
auth_params = {
# version and platform hard-coded in com/qiyi/player/core/model/remote/AuthenticationRemote.as
'version': '2.0',
'platform': 'b6c13e26323c537d',
'aid': tvid,
@staticmethod
def _gen_sc(tvid, timestamp):
M = [1732584193, -271733879]
M.extend([~M[0], ~M[1]])
I_table = [7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23, 6, 10, 15, 21]
C_base = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8388608, 432]
def L(n, t):
if t is None:
t = 0
return trunc(((n >> 1) + (t >> 1) << 1) + (n & 1) + (t & 1))
def trunc(n):
n = n % 0x100000000
if n > 0x7fffffff:
n -= 0x100000000
return n
def transform(string, mod):
num = int(string, 16)
return (num >> 8 * (i % 4) & 255 ^ i % mod) << ((a & 3) << 3)
C = list(C_base)
o = list(M)
k = str(timestamp - 7)
for i in range(13):
a = i
C[a >> 2] |= ord(k[a]) << 8 * (a % 4)
for i in range(16):
a = i + 13
start = (i >> 2) * 8
r = '03967743b643f66763d623d637e30733'
C[a >> 2] |= transform(''.join(reversed(r[start:start + 8])), 7)
for i in range(16):
a = i + 29
start = (i >> 2) * 8
r = '7038766939776a32776a32706b337139'
C[a >> 2] |= transform(r[start:start + 8], 1)
for i in range(9):
a = i + 45
if i < len(tvid):
C[a >> 2] |= ord(tvid[i]) << 8 * (a % 4)
for a in range(64):
i = a
I = i >> 4
C_index = [i, 5 * i + 1, 3 * i + 5, 7 * i][I] % 16 + urshift(a, 6)
m = L(L(o[0], [
trunc(o[1] & o[2]) | trunc(~o[1] & o[3]),
trunc(o[3] & o[1]) | trunc(~o[3] & o[2]),
o[1] ^ o[2] ^ o[3],
o[2] ^ trunc(o[1] | ~o[3])
][I]), L(
trunc(int(abs(math.sin(i + 1)) * 4294967296)),
C[C_index] if C_index < len(C) else None))
I = I_table[4 * I + i % 4]
o = [o[3],
L(o[1], trunc(trunc(m << I) | urshift(m, 32 - I))),
o[1],
o[2]]
new_M = [L(o[0], M[0]), L(o[1], M[1]), L(o[2], M[2]), L(o[3], M[3])]
s = [new_M[a >> 3] >> (1 ^ a & 7) * 4 & 15 for a in range(32)]
return binascii.hexlify(intlist_to_bytes(s))[1::2].decode('ascii')
def get_raw_data(self, tvid, video_id):
tm = int(time.time() * 1000)
sc = self._gen_sc(tvid, tm)
params = {
'platForm': 'h5',
'rate': 1,
'tvid': tvid,
'uid': '',
'deviceId': _uuid,
'playType': 'main', # XXX: always main?
'filename': os.path.splitext(url_basename(api_video_url))[0],
}
qd_items = compat_parse_qs(compat_urllib_parse_urlparse(api_video_url).query)
for key, val in qd_items.items():
auth_params[key] = val[0]
auth_req = sanitized_Request(
'http://api.vip.iqiyi.com/services/ckn.action',
urlencode_postdata(auth_params))
# iQiyi server throws HTTP 405 error without the following header
auth_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
auth_result = self._download_json(
auth_req, video_id,
note='Downloading video authentication JSON',
errnote='Unable to download video authentication JSON')
code = auth_result.get('code')
msg = self.AUTH_API_ERRORS.get(code) or auth_result.get('msg') or code
if code == 'Q00506':
if do_report_warning:
self.report_warning(msg)
return False
if 'data' not in auth_result:
if msg is not None:
raise ExtractorError('%s said: %s' % (self.IE_NAME, msg), expected=True)
raise ExtractorError('Unexpected error from Iqiyi auth API')
return auth_result['data']
def construct_video_urls(self, data, video_id, _uuid, tvid):
def do_xor(x, y):
a = y % 3
if a == 1:
return x ^ 121
if a == 2:
return x ^ 72
return x ^ 103
def get_encode_code(l):
a = 0
b = l.split('-')
c = len(b)
s = ''
for i in range(c - 1, -1, -1):
a = do_xor(int(b[c - i - 1], 16), i)
s += chr(a)
return s[::-1]
def get_path_key(x, format_id, segment_index):
mg = ')(*&^flash@#$%a'
tm = self._download_json(
'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id,
note='Download path key of segment %d for format %s' % (segment_index + 1, format_id)
)['t']
t = str(int(math.floor(int(tm) / (600.0))))
return md5_text(t + mg + x)
video_urls_dict = {}
need_vip_warning_report = True
for format_item in data['vp']['tkl'][0]['vs']:
if 0 < int(format_item['bid']) <= 10:
format_id = self.get_format(format_item['bid'])
else:
continue
video_urls = []
video_urls_info = format_item['fs']
if not format_item['fs'][0]['l'].startswith('/'):
t = get_encode_code(format_item['fs'][0]['l'])
if t.endswith('mp4'):
video_urls_info = format_item['flvs']
for segment_index, segment in enumerate(video_urls_info):
vl = segment['l']
if not vl.startswith('/'):
vl = get_encode_code(vl)
is_vip_video = '/vip/' in vl
filesize = segment['b']
base_url = data['vp']['du'].split('/')
if not is_vip_video:
key = get_path_key(
vl.split('/')[-1].split('.')[0], format_id, segment_index)
base_url.insert(-1, key)
base_url = '/'.join(base_url)
param = {
'su': _uuid,
'qyid': uuid.uuid4().hex,
'client': '',
'z': '',
'bt': '',
'ct': '',
'tn': str(int(time.time()))
}
api_video_url = base_url + vl
if is_vip_video:
api_video_url = api_video_url.replace('.f4v', '.hml')
auth_result = self._authenticate_vip_video(
api_video_url, video_id, tvid, _uuid, need_vip_warning_report)
if auth_result is False:
need_vip_warning_report = False
break
param.update({
't': auth_result['t'],
# cid is hard-coded in com/qiyi/player/core/player/RuntimeData.as
'cid': 'afbe8fd3d73448c9',
'vid': video_id,
'QY00001': auth_result['u'],
})
api_video_url += '?' if '?' not in api_video_url else '&'
api_video_url += compat_urllib_parse_urlencode(param)
js = self._download_json(
api_video_url, video_id,
note='Download video info of segment %d for format %s' % (segment_index + 1, format_id))
video_url = js['l']
video_urls.append(
(video_url, filesize))
video_urls_dict[format_id] = video_urls
return video_urls_dict
def get_format(self, bid):
matched_format_ids = [_format_id for _bid, _format_id in self._FORMATS_MAP if _bid == str(bid)]
return matched_format_ids[0] if len(matched_format_ids) else None
def get_bid(self, format_id):
matched_bids = [_bid for _bid, _format_id in self._FORMATS_MAP if _format_id == format_id]
return matched_bids[0] if len(matched_bids) else None
def get_raw_data(self, tvid, video_id, enc_key, _uuid):
tm = str(int(time.time()))
tail = tm + tvid
param = {
'key': 'fvip',
'src': md5_text('youtube-dl'),
'tvId': tvid,
'vid': video_id,
'vinfo': 1,
'tm': tm,
'enc': md5_text(enc_key + tail),
'qyid': _uuid,
'tn': random.random(),
# In iQiyi's flash player, um is set to 1 if there's a logged user
# Some 1080P formats are only available with a logged user.
# Here force um=1 to trick the iQiyi server
'um': 1,
'authkey': md5_text(md5_text('') + tail),
'k_tag': 1,
'cupid': 'qc_100001_100186',
'type': 'mp4',
'nolimit': 0,
'agenttype': 13,
'src': 'd846d0c32d664d32b6b54ea48997a589',
'sc': sc,
't': tm - 7,
'__jsT': None,
}
api_url = 'http://cache.video.qiyi.com/vms' + '?' + \
compat_urllib_parse_urlencode(param)
raw_data = self._download_json(api_url, video_id)
return raw_data
def get_enc_key(self, video_id):
# TODO: automatic key extraction
# last update at 2016-01-22 for Zombie::bite
enc_key = '4a1caba4b4465345366f28da7c117d20'
return enc_key
headers = {}
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
if cn_verification_proxy:
headers['Ytdl-request-proxy'] = cn_verification_proxy
return self._download_json(
'http://cache.m.iqiyi.com/jp/tmts/%s/%s/' % (tvid, video_id),
video_id, transform_source=lambda s: remove_start(s, 'var tvInfoJs='),
query=params, headers=headers)
def _extract_playlist(self, webpage):
PAGE_SIZE = 50
@ -571,58 +434,27 @@ class IqiyiIE(InfoExtractor):
r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid')
video_id = self._search_regex(
r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
_uuid = uuid.uuid4().hex
enc_key = self.get_enc_key(video_id)
for _ in range(5):
raw_data = self.get_raw_data(tvid, video_id)
raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid)
if raw_data['code'] != 'A00000':
if raw_data['code'] == 'A00111':
self.raise_geo_restricted()
raise ExtractorError('Unable to load data. Error code: ' + raw_data['code'])
if raw_data['code'] != 'A000000':
raise ExtractorError('Unable to load data. Error code: ' + raw_data['code'])
data = raw_data['data']
data = raw_data['data']
# iQiYi sometimes returns Ads
if not isinstance(data['playInfo'], dict):
self._sleep(5, video_id)
continue
title = data['vi']['vn']
title = data['playInfo']['an']
break
# generate video_urls_dict
video_urls_dict = self.construct_video_urls(
data, video_id, _uuid, tvid)
# construct info
entries = []
for format_id in video_urls_dict:
video_urls = video_urls_dict[format_id]
for i, video_url_info in enumerate(video_urls):
if len(entries) < i + 1:
entries.append({'formats': []})
entries[i]['formats'].append(
{
'url': video_url_info[0],
'filesize': video_url_info[-1],
'format_id': format_id,
'preference': int(self.get_bid(format_id))
}
)
for i in range(len(entries)):
self._sort_formats(entries[i]['formats'])
entries[i].update(
{
'id': '%s_part%d' % (video_id, i + 1),
'title': title,
}
)
if len(entries) > 1:
info = {
'_type': 'multi_video',
'id': video_id,
'title': title,
'entries': entries,
}
else:
info = entries[0]
info['id'] = video_id
info['title'] = title
return info
return {
'id': video_id,
'title': title,
'url': data['m3u'],
}

View File

@ -64,6 +64,32 @@ class KalturaIE(InfoExtractor):
}
]
@staticmethod
def _extract_url(webpage):
mobj = (
re.search(
r"""(?xs)
kWidget\.(?:thumb)?[Ee]mbed\(
\{.*?
(?P<q1>['\"])wid(?P=q1)\s*:\s*
(?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?
(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*
(?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),
""", webpage) or
re.search(
r'''(?xs)
(?P<q1>["\'])
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?
(?P=q1).*?
(?:
entry_?[Ii]d|
(?P<q2>["\'])entry_?[Ii]d(?P=q2)
)\s*:\s*
(?P<q3>["\'])(?P<id>.+?)(?P=q3)
''', webpage))
if mobj:
return 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict()
def _kaltura_api_call(self, video_id, actions, *args, **kwargs):
params = actions[0]
if len(actions) > 1:

View File

@ -23,6 +23,7 @@ from ..utils import (
sanitized_Request,
str_or_none,
url_basename,
urshift,
)
@ -74,15 +75,11 @@ class LeIE(InfoExtractor):
'only_matching': True,
}]
@staticmethod
def urshift(val, n):
return val >> n if val >= 0 else (val + 0x100000000) >> n
# ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
def ror(self, param1, param2):
_loc3_ = 0
while _loc3_ < param2:
param1 = self.urshift(param1, 1) + ((param1 & 1) << 31)
param1 = urshift(param1, 1) + ((param1 & 1) << 31)
_loc3_ += 1
return param1

View File

@ -1,8 +1,6 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@ -23,34 +21,5 @@ class M6IE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
rss = self._download_xml('http://ws.m6.fr/v1/video/info/m6/bonus/%s' % video_id, video_id,
'Downloading video RSS')
title = rss.find('./channel/item/title').text
description = rss.find('./channel/item/description').text
thumbnail = rss.find('./channel/item/visuel_clip_big').text
duration = int(rss.find('./channel/item/duration').text)
view_count = int(rss.find('./channel/item/nombre_vues').text)
formats = []
for format_id in ['lq', 'sd', 'hq', 'hd']:
video_url = rss.find('./channel/item/url_video_%s' % format_id)
if video_url is None:
continue
formats.append({
'url': video_url.text,
'format_id': format_id,
})
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'view_count': view_count,
'formats': formats,
}
video_id = self._match_id(url)
return self.url_result('6play:%s' % video_id, 'SixPlay', video_id)

View File

@ -0,0 +1,73 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .pladform import PladformIE
from ..utils import (
unescapeHTML,
int_or_none,
ExtractorError,
)
class METAIE(InfoExtractor):
_VALID_URL = r'https?://video\.meta\.ua/(?:iframe/)?(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://video.meta.ua/5502115.video',
'md5': '71b6f3ee274bef16f1ab410f7f56b476',
'info_dict': {
'id': '5502115',
'ext': 'mp4',
'title': 'Sony Xperia Z camera test [HQ]',
'description': 'Xperia Z shoots video in FullHD HDR.',
'uploader_id': 'nomobile',
'uploader': 'CHЁZA.TV',
'upload_date': '20130211',
},
'add_ie': ['Youtube'],
}, {
'url': 'http://video.meta.ua/iframe/5502115',
'only_matching': True,
}, {
# pladform embed
'url': 'http://video.meta.ua/7121015.video',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
st_html5 = self._search_regex(
r"st_html5\s*=\s*'#([^']+)'", webpage, 'uppod html5 st', default=None)
if st_html5:
# uppod st decryption algorithm is reverse engineered from function un(s) at uppod.js
json_str = ''
for i in range(0, len(st_html5), 3):
json_str += '&#x0%s;' % st_html5[i:i + 3]
uppod_data = self._parse_json(unescapeHTML(json_str), video_id)
error = uppod_data.get('customnotfound')
if error:
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
video_url = uppod_data['file']
info = {
'id': video_id,
'url': video_url,
'title': uppod_data.get('comment') or self._og_search_title(webpage),
'description': self._og_search_description(webpage, default=None),
'thumbnail': uppod_data.get('poster') or self._og_search_thumbnail(webpage),
'duration': int_or_none(self._og_search_property(
'video:duration', webpage, default=None)),
}
if 'youtube.com/' in video_url:
info.update({
'_type': 'url_transparent',
'ie_key': 'Youtube',
})
return info
pladform_url = PladformIE._extract_url(webpage)
if pladform_url:
return self.url_result(pladform_url)

View File

@ -102,11 +102,11 @@ class MixcloudIE(InfoExtractor):
description = self._og_search_description(webpage)
like_count = parse_count(self._search_regex(
r'\bbutton-favorite[^>]+>.*?<span[^>]+class=["\']toggle-number[^>]+>\s*([^<]+)',
webpage, 'like count', fatal=False))
webpage, 'like count', default=None))
view_count = str_to_int(self._search_regex(
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
r'/listeners/?">([0-9,.]+)</a>'],
webpage, 'play count', fatal=False))
webpage, 'play count', default=None))
return {
'id': track_id,

122
youtube_dl/extractor/msn.py Normal file
View File

@ -0,0 +1,122 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
unescapeHTML,
)
class MSNIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?msn\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/[a-z]{2}-(?P<id>[\da-zA-Z]+)'
_TESTS = [{
'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/criminal-minds-shemar-moore-shares-a-touching-goodbye-message/vp-BBqQYNE',
'md5': '8442f66c116cbab1ff7098f986983458',
'info_dict': {
'id': 'BBqQYNE',
'display_id': 'criminal-minds-shemar-moore-shares-a-touching-goodbye-message',
'ext': 'mp4',
'title': 'Criminal Minds - Shemar Moore Shares A Touching Goodbye Message',
'description': 'md5:e8e89b897b222eb33a6b5067a8f1bc25',
'duration': 104,
'uploader': 'CBS Entertainment',
'uploader_id': 'IT0X5aoJ6bJgYerJXSDCgFmYPB1__54v',
},
}, {
'url': 'http://www.msn.com/en-ae/news/offbeat/meet-the-nine-year-old-self-made-millionaire/ar-BBt6ZKf',
'only_matching': True,
}, {
'url': 'http://www.msn.com/en-ae/video/watch/obama-a-lot-of-people-will-be-disappointed/vi-AAhxUMH',
'only_matching': True,
}, {
# geo restricted
'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/the-first-fart-makes-you-laugh-the-last-fart-makes-you-cry/vp-AAhzIBU',
'only_matching': True,
}, {
'url': 'http://www.msn.com/en-ae/entertainment/bollywood/watch-how-salman-khan-reacted-when-asked-if-he-would-apologize-for-his-raped-woman-comment/vi-AAhvzW6',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id, display_id = mobj.group('id', 'display_id')
webpage = self._download_webpage(url, display_id)
video = self._parse_json(
self._search_regex(
r'data-metadata\s*=\s*(["\'])(?P<data>.+?)\1',
webpage, 'video data', default='{}', group='data'),
display_id, transform_source=unescapeHTML)
if not video:
error = unescapeHTML(self._search_regex(
r'data-error=(["\'])(?P<error>.+?)\1',
webpage, 'error', group='error'))
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
title = video['title']
formats = []
for file_ in video.get('videoFiles', []):
format_url = file_.get('url')
if not format_url:
continue
ext = determine_ext(format_url)
# .ism is not yet supported (see
# https://github.com/rg3/youtube-dl/issues/8118)
if ext == 'ism':
continue
if 'm3u8' in format_url:
# m3u8_native should not be used here until
# https://github.com/rg3/youtube-dl/issues/9913 is fixed
m3u8_formats = self._extract_m3u8_formats(
format_url, display_id, 'mp4',
m3u8_id='hls', fatal=False)
# Despite metadata in m3u8 all video+audio formats are
# actually video-only (no audio)
for f in m3u8_formats:
if f.get('acodec') != 'none' and f.get('vcodec') != 'none':
f['acodec'] = 'none'
formats.extend(m3u8_formats)
else:
formats.append({
'url': format_url,
'ext': 'mp4',
'format_id': 'http',
'width': int_or_none(file_.get('width')),
'height': int_or_none(file_.get('height')),
})
self._sort_formats(formats)
subtitles = {}
for file_ in video.get('files', []):
format_url = file_.get('url')
format_code = file_.get('formatCode')
if not format_url or not format_code:
continue
if compat_str(format_code) == '3100':
subtitles.setdefault(file_.get('culture', 'en'), []).append({
'ext': determine_ext(format_url, 'ttml'),
'url': format_url,
})
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': video.get('description'),
'thumbnail': video.get('headlineImage', {}).get('url'),
'duration': int_or_none(video.get('durationSecs')),
'uploader': video.get('sourceFriendly'),
'uploader_id': video.get('providerId'),
'creator': video.get('creator'),
'subtitles': subtitles,
'formats': formats,
}

View File

@ -0,0 +1,55 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
parse_duration,
ExtractorError
)
class NineCNineMediaIE(InfoExtractor):
_VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)'
def _real_extract(self, url):
destination_code, video_id = re.match(self._VALID_URL, url).groups()
api_base_url = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/' % (destination_code, video_id)
content = self._download_json(api_base_url, video_id, query={
'$include': '[contentpackages]',
})
title = content['Name']
if len(content['ContentPackages']) > 1:
raise ExtractorError('multiple content packages')
content_package = content['ContentPackages'][0]
stacks_base_url = api_base_url + 'contentpackages/%s/stacks/' % content_package['Id']
stacks = self._download_json(stacks_base_url, video_id)['Items']
if len(stacks) > 1:
raise ExtractorError('multiple stacks')
stack = stacks[0]
stack_base_url = '%s%s/manifest.' % (stacks_base_url, stack['Id'])
formats = []
formats.extend(self._extract_m3u8_formats(
stack_base_url + 'm3u8', video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
stack_base_url + 'f4m', video_id,
f4m_id='hds', fatal=False))
mp4_url = self._download_webpage(stack_base_url + 'pd', video_id, fatal=False)
if mp4_url:
formats.append({
'url': mp4_url,
'format_id': 'mp4',
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': content.get('Desc') or content.get('ShortDesc'),
'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
'duration': parse_duration(content.get('BroadcastTime')),
'formats': formats,
}

View File

@ -516,9 +516,14 @@ class PBSIE(InfoExtractor):
# https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications
if not bitrate or bitrate not in ('400k', '800k', '1200k', '2500k'):
continue
f_url = re.sub(r'\d+k|baseline', bitrate, http_url)
# This may produce invalid links sometimes (e.g.
# http://www.pbs.org/wgbh/frontline/film/suicide-plan)
if not self._is_valid_url(f_url, display_id, 'http-%s video' % bitrate):
continue
f = m3u8_format.copy()
f.update({
'url': re.sub(r'\d+k|baseline', bitrate, http_url),
'url': f_url,
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
'protocol': 'http',
})

View File

@ -120,9 +120,12 @@ class PeriscopeUserIE(InfoExtractor):
title = user.get('display_name') or user.get('username')
description = user.get('description')
broadcast_ids = (data_store.get('UserBroadcastHistory', {}).get('broadcastIds') or
data_store.get('BroadcastCache', {}).get('broadcastIds', []))
entries = [
self.url_result(
'https://www.periscope.tv/%s/%s' % (user_id, broadcast['id']))
for broadcast in data_store.get('UserBroadcastHistory', {}).get('broadcasts', [])]
'https://www.periscope.tv/%s/%s' % (user_id, broadcast_id))
for broadcast_id in broadcast_ids]
return self.playlist_result(entries, user_id, title, description)

View File

@ -49,7 +49,7 @@ class PladformIE(InfoExtractor):
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+src="(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)"', webpage)
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)\1', webpage)
if mobj:
return mobj.group('url')

View File

@ -0,0 +1,95 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse_unquote,
)
from ..utils import (
int_or_none,
strip_or_none,
unified_timestamp,
)
class PolskieRadioIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+/\d+/Artykul/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943,Prof-Andrzej-Nowak-o-historii-nie-da-sie-myslec-beznamietnie',
'info_dict': {
'id': '1587943',
'title': 'Prof. Andrzej Nowak: o historii nie da się myśleć beznamiętnie',
'description': 'md5:12f954edbf3120c5e7075e17bf9fc5c5',
},
'playlist': [{
'md5': '2984ee6ce9046d91fc233bc1a864a09a',
'info_dict': {
'id': '1540576',
'ext': 'mp3',
'title': 'md5:d4623290d4ac983bf924061c75c23a0d',
'timestamp': 1456594200,
'upload_date': '20160227',
'duration': 2364,
},
}],
}, {
'url': 'http://www.polskieradio.pl/265/5217/Artykul/1635803,Euro-2016-nie-ma-miejsca-na-blad-Polacy-graja-ze-Szwajcaria-o-cwiercfinal',
'info_dict': {
'id': '1635803',
'title': 'Euro 2016: nie ma miejsca na błąd. Polacy grają ze Szwajcarią o ćwierćfinał',
'description': 'md5:01cb7d0cad58664095d72b51a1ebada2',
},
'playlist_mincount': 12,
}, {
'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis',
'only_matching': True,
}, {
'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943',
'only_matching': True,
}, {
# with mp4 video
'url': 'http://www.polskieradio.pl/9/299/Artykul/1634903,Brexit-Leszek-Miller-swiat-sie-nie-zawali-Europa-bedzie-trwac-dalej',
'only_matching': True,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
content = self._search_regex(
r'(?s)<div[^>]+class="audio atarticle"[^>]*>(.+?)<script>',
webpage, 'content')
timestamp = unified_timestamp(self._html_search_regex(
r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>',
webpage, 'timestamp', fatal=False))
entries = []
media_urls = set()
for data_media in re.findall(r'<[^>]+data-media=({[^>]+})', content):
media = self._parse_json(data_media, playlist_id, fatal=False)
if not media.get('file') or not media.get('desc'):
continue
media_url = self._proto_relative_url(media['file'], 'http:')
if media_url in media_urls:
continue
media_urls.add(media_url)
entries.append({
'id': compat_str(media['id']),
'url': media_url,
'title': compat_urllib_parse_unquote(media['desc']),
'duration': int_or_none(media.get('length')),
'vcodec': 'none' if media.get('provider') == 'audio' else None,
'timestamp': timestamp,
})
title = self._og_search_title(webpage).strip()
description = strip_or_none(self._og_search_description(webpage))
return self.playlist_result(entries, playlist_id, title, description)

View File

@ -25,7 +25,15 @@ from ..aes import (
class PornHubIE(InfoExtractor):
_VALID_URL = r'https?://(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)(?P<id>[0-9a-z]+)'
IE_DESC = 'PornHub and Thumbzilla'
_VALID_URL = r'''(?x)
https?://
(?:
(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/
)
(?P<id>[0-9a-z]+)
'''
_TESTS = [{
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
'md5': '1e19b41231a02eba417839222ac9d58e',
@ -63,8 +71,20 @@ class PornHubIE(InfoExtractor):
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
'only_matching': True,
}, {
# removed at the request of cam4.com
'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862',
'only_matching': True,
}, {
# removed at the request of the copyright owner
'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859',
'only_matching': True,
}, {
# removed by uploader
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111',
'only_matching': True,
}, {
'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex',
'only_matching': True,
}]
@classmethod
@ -87,8 +107,8 @@ class PornHubIE(InfoExtractor):
webpage = self._download_webpage(req, video_id)
error_msg = self._html_search_regex(
r'(?s)<div class="userMessageSection[^"]*".*?>(.*?)</div>',
webpage, 'error message', default=None)
r'(?s)<div[^>]+class=(["\']).*?\bremoved\b.*?\1[^>]*>(?P<error>.+?)</div>',
webpage, 'error message', default=None, group='error')
if error_msg:
error_msg = re.sub(r'\s+', ' ', error_msg)
raise ExtractorError(

View File

@ -1,23 +1,23 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_duration,
parse_iso8601,
js_to_json,
)
from ..compat import compat_str
class RDSIE(InfoExtractor):
IE_DESC = 'RDS.ca'
_VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<display_id>[^/]+)-(?P<id>\d+\.\d+)'
_VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<id>[^/]+)-\d+\.\d+'
_TESTS = [{
'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799',
'info_dict': {
'id': '3.1132799',
'id': '604333',
'display_id': 'fowler-jr-prend-la-direction-de-jacksonville',
'ext': 'mp4',
'title': 'Fowler Jr. prend la direction de Jacksonville',
@ -33,22 +33,17 @@ class RDSIE(InfoExtractor):
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
# TODO: extract f4m from 9c9media.com
video_url = self._search_regex(
r'<span[^>]+itemprop="contentURL"[^>]+content="([^"]+)"',
webpage, 'video url')
title = self._og_search_title(webpage) or self._html_search_meta(
item = self._parse_json(self._search_regex(r'(?s)itemToPush\s*=\s*({.+?});', webpage, 'item'), display_id, js_to_json)
video_id = compat_str(item['id'])
title = item.get('title') or self._og_search_title(webpage) or self._html_search_meta(
'title', webpage, 'title', fatal=True)
description = self._og_search_description(webpage) or self._html_search_meta(
'description', webpage, 'description')
thumbnail = self._og_search_thumbnail(webpage) or self._search_regex(
thumbnail = item.get('urlImageBig') or self._og_search_thumbnail(webpage) or self._search_regex(
[r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"',
r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'],
webpage, 'thumbnail', fatal=False)
@ -61,13 +56,15 @@ class RDSIE(InfoExtractor):
age_limit = self._family_friendly_search(webpage)
return {
'_type': 'url_transparent',
'id': video_id,
'display_id': display_id,
'url': video_url,
'url': '9c9media:rds_web:%s' % video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'age_limit': age_limit,
'ie_key': 'NineCNineMedia',
}

View File

@ -0,0 +1,60 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
qualities,
int_or_none,
)
class SixPlayIE(InfoExtractor):
_VALID_URL = r'(?:6play:|https?://(?:www\.)?6play\.fr/.+?-c_)(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.6play.fr/jamel-et-ses-amis-au-marrakech-du-rire-p_1316/jamel-et-ses-amis-au-marrakech-du-rire-2015-c_11495320',
'md5': '42310bffe4ba3982db112b9cd3467328',
'info_dict': {
'id': '11495320',
'ext': 'mp4',
'title': 'Jamel et ses amis au Marrakech du rire 2015',
'description': 'md5:ba2149d5c321d5201b78070ee839d872',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
clip_data = self._download_json(
'https://player.m6web.fr/v2/video/config/6play-auth/FR/%s.json' % video_id,
video_id)
video_data = clip_data['videoInfo']
quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
formats = []
for source in clip_data['sources']:
source_type, source_url = source.get('type'), source.get('src')
if not source_url or source_type == 'hls/primetime':
continue
if source_type == 'application/vnd.apple.mpegURL':
formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
source_url.replace('.m3u8', '.f4m'),
video_id, f4m_id='hds', fatal=False))
elif source_type == 'video/mp4':
quality = source.get('quality')
formats.append({
'url': source_url,
'format_id': quality,
'quality': quality_key(quality),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': video_data['title'].strip(),
'description': video_data.get('description'),
'duration': int_or_none(video_data.get('duration')),
'series': video_data.get('titlePgm'),
'formats': formats,
}

View File

@ -67,7 +67,7 @@ class SkyNewsArabiaIE(SkyNewsArabiaBaseIE):
class SkyNewsArabiaArticleIE(SkyNewsArabiaBaseIE):
IE_NAME = 'skynewsarabia:video'
IE_NAME = 'skynewsarabia:article'
_VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/article/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.skynewsarabia.com/web/article/794549/%D8%A7%D9%94%D8%AD%D8%AF%D8%A7%D8%AB-%D8%A7%D9%84%D8%B4%D8%B1%D9%82-%D8%A7%D9%84%D8%A7%D9%94%D9%88%D8%B3%D8%B7-%D8%AE%D8%B1%D9%8A%D8%B7%D8%A9-%D8%A7%D9%84%D8%A7%D9%94%D9%84%D8%B9%D8%A7%D8%A8-%D8%A7%D9%84%D8%B0%D9%83%D9%8A%D8%A9',

View File

@ -0,0 +1,33 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class SkySportsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine',
'md5': 'c44a1db29f27daf9a0003e010af82100',
'info_dict': {
'id': '10328419',
'ext': 'flv',
'title': 'Bale: Its our time to shine',
'description': 'md5:9fd1de3614d525f5addda32ac3c482c9',
},
'add_ie': ['Ooyala'],
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
return {
'_type': 'url_transparent',
'id': video_id,
'url': 'ooyala:%s' % self._search_regex(
r'data-video-id="([^"]+)"', webpage, 'ooyala id'),
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'ie_key': 'Ooyala',
}

View File

@ -9,6 +9,7 @@ from ..utils import (
class SRMediathekIE(ARDMediathekIE):
IE_NAME = 'sr:mediathek'
IE_DESC = 'Saarländischer Rundfunk'
_VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'

View File

@ -120,7 +120,7 @@ class SVTIE(SVTBaseIE):
class SVTPlayIE(SVTBaseIE):
IE_DESC = 'SVT Play and Öppet arkiv'
_VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
_VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp)/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
@ -141,6 +141,9 @@ class SVTPlayIE(SVTBaseIE):
# geo restricted to Sweden
'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
'only_matching': True,
}, {
'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -4,6 +4,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
clean_html,
get_element_by_attribute,
ExtractorError,
)
class TVPIE(InfoExtractor):
@ -21,7 +27,7 @@ class TVPIE(InfoExtractor):
},
}, {
'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
'md5': 'c3b15ed1af288131115ff17a17c19dda',
'md5': 'b0005b542e5b4de643a9690326ab1257',
'info_dict': {
'id': '17916176',
'ext': 'mp4',
@ -53,6 +59,11 @@ class TVPIE(InfoExtractor):
webpage = self._download_webpage(
'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id)
error_massage = get_element_by_attribute('class', 'msg error', webpage)
if error_massage:
raise ExtractorError('%s said: %s' % (
self.IE_NAME, clean_html(error_massage)), expected=True)
title = self._search_regex(
r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1',
webpage, 'title', group='title')
@ -66,24 +77,50 @@ class TVPIE(InfoExtractor):
r"poster\s*:\s*'([^']+)'", webpage, 'thumbnail', default=None)
video_url = self._search_regex(
r'0:{src:([\'"])(?P<url>.*?)\1', webpage, 'formats', group='url', default=None)
if not video_url:
r'0:{src:([\'"])(?P<url>.*?)\1', webpage,
'formats', group='url', default=None)
if not video_url or 'material_niedostepny.mp4' in video_url:
video_url = self._download_json(
'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id,
video_id)['video_url']
ext = video_url.rsplit('.', 1)[-1]
if ext != 'ism/manifest':
if '/' in ext:
ext = 'mp4'
formats = []
video_url_base = self._search_regex(
r'(https?://.+?/video)(?:\.(?:ism|f4m|m3u8)|-\d+\.mp4)',
video_url, 'video base url', default=None)
if video_url_base:
# TODO: Current DASH formats are broken - $Time$ pattern in
# <SegmentTemplate> not implemented yet
# formats.extend(self._extract_mpd_formats(
# video_url_base + '.ism/video.mpd',
# video_id, mpd_id='dash', fatal=False))
formats.extend(self._extract_f4m_formats(
video_url_base + '.ism/video.f4m',
video_id, f4m_id='hds', fatal=False))
m3u8_formats = self._extract_m3u8_formats(
video_url_base + '.ism/video.m3u8', video_id,
'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
self._sort_formats(m3u8_formats)
m3u8_formats = list(filter(
lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
m3u8_formats))
formats.extend(m3u8_formats)
for i, m3u8_format in enumerate(m3u8_formats, 2):
http_url = '%s-%d.mp4' % (video_url_base, i)
if self._is_valid_url(http_url, video_id):
f = m3u8_format.copy()
f.update({
'url': http_url,
'format_id': f['format_id'].replace('hls', 'http'),
'protocol': 'http',
})
formats.append(f)
else:
formats = [{
'format_id': 'direct',
'url': video_url,
'ext': ext,
'ext': determine_ext(video_url, 'mp4'),
}]
else:
m3u8_url = re.sub('([^/]*)\.ism/manifest', r'\1.ism/\1.m3u8', video_url)
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
self._sort_formats(formats)

View File

@ -29,7 +29,7 @@ class TwitchBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?twitch\.tv'
_API_BASE = 'https://api.twitch.tv'
_USHER_BASE = 'http://usher.twitch.tv'
_USHER_BASE = 'https://usher.ttvnw.net'
_LOGIN_URL = 'http://www.twitch.tv/login'
_NETRC_MACHINE = 'twitch'

View File

@ -0,0 +1,67 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class URPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?urplay\.se/program/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://urplay.se/program/190031-tripp-trapp-trad-sovkudde',
'md5': '15ca67b63fd8fb320ac2bcd854bad7b6',
'info_dict': {
'id': '190031',
'ext': 'mp4',
'title': 'Tripp, Trapp, Träd : Sovkudde',
'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
urplayer_data = self._parse_json(self._search_regex(
r'urPlayer\.init\(({.+?})\);', webpage, 'urplayer data'), video_id)
host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect']
formats = []
for quality_attr, quality, preference in (('', 'sd', 0), ('_hd', 'hd', 1)):
file_rtmp = urplayer_data.get('file_rtmp' + quality_attr)
if file_rtmp:
formats.append({
'url': 'rtmp://%s/urplay/mp4:%s' % (host, file_rtmp),
'format_id': quality + '-rtmp',
'ext': 'flv',
'preference': preference,
})
file_http = urplayer_data.get('file_http' + quality_attr) or urplayer_data.get('file_http_sub' + quality_attr)
if file_http:
file_http_base_url = 'http://%s/%s' % (host, file_http)
formats.extend(self._extract_f4m_formats(
file_http_base_url + 'manifest.f4m', video_id,
preference, '%s-hds' % quality, fatal=False))
formats.extend(self._extract_m3u8_formats(
file_http_base_url + 'playlist.m3u8', video_id, 'mp4',
'm3u8_native', preference, '%s-hls' % quality, fatal=False))
self._sort_formats(formats)
subtitles = {}
for subtitle in urplayer_data.get('subtitles', []):
subtitle_url = subtitle.get('file')
kind = subtitle.get('kind')
if subtitle_url or kind and kind != 'captions':
continue
subtitles.setdefault(subtitle.get('label', 'Svenska'), []).append({
'url': subtitle_url,
})
return {
'id': video_id,
'title': urplayer_data['title'],
'description': self._og_search_description(webpage),
'thumbnail': urplayer_data.get('image'),
'series': urplayer_data.get('series_title'),
'subtitles': subtitles,
'formats': formats,
}

View File

@ -0,0 +1,84 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
int_or_none,
js_to_json,
remove_end,
unified_strdate,
)
class VidbitIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vidbit\.co/(?:watch|embed)\?.*?\bv=(?P<id>[\da-zA-Z]+)'
_TESTS = [{
'url': 'http://www.vidbit.co/watch?v=jkL2yDOEq2',
'md5': '1a34b7f14defe3b8fafca9796892924d',
'info_dict': {
'id': 'jkL2yDOEq2',
'ext': 'mp4',
'title': 'Intro to VidBit',
'description': 'md5:5e0d6142eec00b766cbf114bfd3d16b7',
'thumbnail': 're:https?://.*\.jpg$',
'upload_date': '20160618',
'view_count': int,
'comment_count': int,
}
}, {
'url': 'http://www.vidbit.co/embed?v=jkL2yDOEq2&auto=0&water=0',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
compat_urlparse.urljoin(url, '/watch?v=%s' % video_id), video_id)
video_url, title = [None] * 2
config = self._parse_json(self._search_regex(
r'(?s)\.setup\(({.+?})\);', webpage, 'setup', default='{}'),
video_id, transform_source=js_to_json)
if config:
if config.get('file'):
video_url = compat_urlparse.urljoin(url, config['file'])
title = config.get('title')
if not video_url:
video_url = compat_urlparse.urljoin(url, self._search_regex(
r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'video URL', group='url'))
if not title:
title = remove_end(
self._html_search_regex(
(r'<h1>(.+?)</h1>', r'<title>(.+?)</title>'),
webpage, 'title', default=None) or self._og_search_title(webpage),
' - VidBit')
description = self._html_search_meta(
('description', 'og:description', 'twitter:description'),
webpage, 'description')
upload_date = unified_strdate(self._html_search_meta(
'datePublished', webpage, 'upload date'))
view_count = int_or_none(self._search_regex(
r'<strong>(\d+)</strong> views',
webpage, 'view count', fatal=False))
comment_count = int_or_none(self._search_regex(
r'id=["\']cmt_num["\'][^>]*>\((\d+)\)',
webpage, 'comment count', fatal=False))
return {
'id': video_id,
'url': video_url,
'title': title,
'description': description,
'thumbnail': self._og_search_thumbnail(webpage),
'upload_date': upload_date,
'view_count': view_count,
'comment_count': comment_count,
}

View File

@ -16,6 +16,7 @@ from ..utils import (
ExtractorError,
InAdvancePagedList,
int_or_none,
NO_DEFAULT,
RegexNotFoundError,
sanitized_Request,
smuggle_url,
@ -56,6 +57,26 @@ class VimeoBaseInfoExtractor(InfoExtractor):
self._set_vimeo_cookie('vuid', vuid)
self._download_webpage(login_request, None, False, 'Wrong login info')
def _verify_video_password(self, url, video_id, webpage):
password = self._downloader.params.get('videopassword')
if password is None:
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
token, vuid = self._extract_xsrft_and_vuid(webpage)
data = urlencode_postdata({
'password': password,
'token': token,
})
if url.startswith('http://'):
# vimeo only supports https now, but the user can give an http url
url = url.replace('http://', 'https://')
password_request = sanitized_Request(url + '/password', data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
password_request.add_header('Referer', url)
self._set_vimeo_cookie('vuid', vuid)
return self._download_webpage(
password_request, video_id,
'Verifying the password', 'Wrong password')
def _extract_xsrft_and_vuid(self, webpage):
xsrft = self._search_regex(
r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
@ -344,26 +365,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
if mobj:
return mobj.group(1)
def _verify_video_password(self, url, video_id, webpage):
password = self._downloader.params.get('videopassword')
if password is None:
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
token, vuid = self._extract_xsrft_and_vuid(webpage)
data = urlencode_postdata({
'password': password,
'token': token,
})
if url.startswith('http://'):
# vimeo only supports https now, but the user can give an http url
url = url.replace('http://', 'https://')
password_request = sanitized_Request(url + '/password', data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
password_request.add_header('Referer', url)
self._set_vimeo_cookie('vuid', vuid)
return self._download_webpage(
password_request, video_id,
'Verifying the password', 'Wrong password')
def _verify_player_video_password(self, url, video_id):
password = self._downloader.params.get('videopassword')
if password is None:
@ -791,12 +792,39 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
'thumbnail': 're:^https?://.*\.jpg$',
'uploader_id': 'user22258446',
}
}, {
'note': 'Password protected',
'url': 'https://vimeo.com/user37284429/review/138823582/c4d865efde',
'info_dict': {
'id': '138823582',
'ext': 'mp4',
'title': 'EFFICIENT PICKUP MASTERCLASS MODULE 1',
'uploader': 'TMB',
'uploader_id': 'user37284429',
},
'params': {
'videopassword': 'holygrail',
},
}]
def _real_initialize(self):
self._login()
def _get_config_url(self, webpage_url, video_id, video_password_verified=False):
webpage = self._download_webpage(webpage_url, video_id)
config_url = self._html_search_regex(
r'data-config-url="([^"]+)"', webpage, 'config URL',
default=NO_DEFAULT if video_password_verified else None)
if config_url is None:
self._verify_video_password(webpage_url, video_id, webpage)
config_url = self._get_config_url(
webpage_url, video_id, video_password_verified=True)
return config_url
def _real_extract(self, url):
video_id = self._match_id(url)
config = self._download_json(
'https://player.vimeo.com/video/%s/config' % video_id, video_id)
config_url = self._get_config_url(url, video_id)
config = self._download_json(config_url, video_id)
info_dict = self._parse_config(config, video_id)
self._vimeo_sort_formats(info_dict['formats'])
info_dict['id'] = video_id

View File

@ -90,10 +90,12 @@ class VineIE(InfoExtractor):
data = self._parse_json(
self._search_regex(
r'window\.POST_DATA\s*=\s*{\s*%s\s*:\s*({.+?})\s*};\s*</script>' % video_id,
r'window\.POST_DATA\s*=\s*({.+?});\s*</script>',
webpage, 'vine data'),
video_id)
data = data[list(data.keys())[0]]
formats = [{
'format_id': '%(format)s-%(rate)s' % f,
'vcodec': f.get('format'),

View File

@ -25,7 +25,8 @@ class VRTIE(InfoExtractor):
'timestamp': 1414271750.949,
'upload_date': '20141025',
'duration': 929,
}
},
'skip': 'HTTP Error 404: Not Found',
},
# sporza.be
{
@ -39,7 +40,8 @@ class VRTIE(InfoExtractor):
'timestamp': 1413835980.560,
'upload_date': '20141020',
'duration': 3238,
}
},
'skip': 'HTTP Error 404: Not Found',
},
# cobra.be
{
@ -53,16 +55,39 @@ class VRTIE(InfoExtractor):
'timestamp': 1413967500.494,
'upload_date': '20141022',
'duration': 661,
}
},
'skip': 'HTTP Error 404: Not Found',
},
{
# YouTube video
'url': 'http://deredactie.be/cm/vrtnieuws/videozone/nieuws/cultuurenmedia/1.2622957',
'only_matching': True,
'md5': 'b8b93da1df1cea6c8556255a796b7d61',
'info_dict': {
'id': 'Wji-BZ0oCwg',
'ext': 'mp4',
'title': 'ROGUE ONE: A STAR WARS STORY Official Teaser Trailer',
'description': 'md5:8e468944dce15567a786a67f74262583',
'uploader': 'Star Wars',
'uploader_id': 'starwars',
'upload_date': '20160407',
},
'add_ie': ['Youtube'],
},
{
'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055',
'only_matching': True,
'md5': '',
'info_dict': {
'id': '2377055',
'ext': 'mp4',
'title': 'Cafe Derby',
'description': 'Lenny Van Wesemael debuteert met de langspeelfilm Café Derby. Een waar gebeurd maar ook verzonnen verhaal.',
'upload_date': '20150626',
'timestamp': 1435305240.769,
},
'params': {
# m3u8 download
'skip_download': True,
}
}
]
@ -98,6 +123,32 @@ class VRTIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats(
src, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
src.replace('playlist.m3u8', 'manifest.f4m'),
video_id, f4m_id='hds', fatal=False))
if 'data-video-geoblocking="true"' not in webpage:
rtmp_formats = self._extract_smil_formats(
src.replace('playlist.m3u8', 'jwplayer.smil'),
video_id, fatal=False)
formats.extend(rtmp_formats)
for rtmp_format in rtmp_formats:
rtmp_format_c = rtmp_format.copy()
rtmp_format_c['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
del rtmp_format_c['play_path']
del rtmp_format_c['ext']
http_format = rtmp_format_c.copy()
http_format.update({
'url': rtmp_format_c['url'].replace('rtmp://', 'http://').replace('vod.', 'download.').replace('/_definst_/', '/').replace('mp4:', ''),
'format_id': rtmp_format['format_id'].replace('rtmp', 'http'),
'protocol': 'http',
})
rtsp_format = rtmp_format_c.copy()
rtsp_format.update({
'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
'protocol': 'rtsp',
})
formats.extend([http_format, rtsp_format])
else:
formats.extend(self._extract_f4m_formats(
'%s/manifest.f4m' % src, video_id, f4m_id='hds', fatal=False))

View File

@ -501,6 +501,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'youtube_include_dash_manifest': True,
'format': '141',
},
'skip': 'format 141 not served anymore',
},
# DASH manifest with encrypted signature
{
@ -517,7 +518,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
},
'params': {
'youtube_include_dash_manifest': True,
'format': '141',
'format': '141/bestaudio[ext=m4a]',
},
},
# JS player signature function name containing $
@ -537,7 +538,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
},
'params': {
'youtube_include_dash_manifest': True,
'format': '141',
'format': '141/bestaudio[ext=m4a]',
},
},
# Controversy video
@ -618,7 +619,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic',
'license': 'Standard YouTube License',
'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
'uploader': 'Olympics',
'uploader': 'Olympic',
'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
},
'params': {
@ -671,7 +672,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
'uploader': 'dorappi2000',
'license': 'Standard YouTube License',
'formats': 'mincount:33',
'formats': 'mincount:32',
},
},
# DASH manifest with segment_list
@ -691,7 +692,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'params': {
'youtube_include_dash_manifest': True,
'format': '135', # bestvideo
}
},
'skip': 'This live event has ended.',
},
{
# Multifeed videos (multiple cameras), URL is for Main Camera
@ -762,6 +764,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
},
'playlist_count': 2,
'skip': 'Not multifeed anymore',
},
{
'url': 'http://vid.plus/FlRa-iH7PGw',
@ -814,6 +817,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'params': {
'skip_download': True,
},
'skip': 'This video does not exist.',
},
{
# Video licensed under Creative Commons
@ -1331,7 +1335,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
(?:[a-zA-Z-]+="[^"]*"\s+)*?
(?:title|href)="([^"]+)"\s+
(?:[a-zA-Z-]+="[^"]*"\s+)*?
class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)"[^>]*>
class="[^"]*"[^>]*>
[^<]+\.{3}\s*
</a>
''', r'\1', video_description)

View File

@ -26,9 +26,7 @@ def parseOpts(overrideArguments=None):
except IOError:
return default # silently skip if file is not present
try:
res = []
for l in optionf:
res += compat_shlex_split(l, comments=True)
res = compat_shlex_split(optionf.read(), comments=True)
finally:
optionf.close()
return res

View File

@ -76,7 +76,7 @@ class Socks4Error(ProxyError):
CODES = {
91: 'request rejected or failed',
92: 'request rejected becasue SOCKS server cannot connect to identd on the client',
92: 'request rejected because SOCKS server cannot connect to identd on the client',
93: 'request rejected because the client program and identd report different user-ids'
}

View File

@ -110,6 +110,49 @@ ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙ
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'],
'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy')))
DATE_FORMATS = (
'%d %B %Y',
'%d %b %Y',
'%B %d %Y',
'%b %d %Y',
'%b %dst %Y %I:%M',
'%b %dnd %Y %I:%M',
'%b %dth %Y %I:%M',
'%Y %m %d',
'%Y-%m-%d',
'%Y/%m/%d',
'%Y/%m/%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S.%f',
'%d.%m.%Y %H:%M',
'%d.%m.%Y %H.%M',
'%Y-%m-%dT%H:%M:%SZ',
'%Y-%m-%dT%H:%M:%S.%fZ',
'%Y-%m-%dT%H:%M:%S.%f0Z',
'%Y-%m-%dT%H:%M:%S',
'%Y-%m-%dT%H:%M:%S.%f',
'%Y-%m-%dT%H:%M',
)
DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
DATE_FORMATS_DAY_FIRST.extend([
'%d-%m-%Y',
'%d.%m.%Y',
'%d.%m.%y',
'%d/%m/%Y',
'%d/%m/%y',
'%d/%m/%Y %H:%M:%S',
])
DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
DATE_FORMATS_MONTH_FIRST.extend([
'%m-%d-%Y',
'%m.%d.%Y',
'%m/%d/%Y',
'%m/%d/%y',
'%m/%d/%Y %H:%M:%S',
])
def preferredencoding():
"""Get preferred encoding.
@ -975,6 +1018,24 @@ class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
https_response = http_response
def extract_timezone(date_str):
m = re.search(
r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
date_str)
if not m:
timezone = datetime.timedelta()
else:
date_str = date_str[:-len(m.group('tz'))]
if not m.group('sign'):
timezone = datetime.timedelta()
else:
sign = 1 if m.group('sign') == '+' else -1
timezone = datetime.timedelta(
hours=sign * int(m.group('hours')),
minutes=sign * int(m.group('minutes')))
return timezone, date_str
def parse_iso8601(date_str, delimiter='T', timezone=None):
""" Return a UNIX timestamp from the given date """
@ -984,20 +1045,8 @@ def parse_iso8601(date_str, delimiter='T', timezone=None):
date_str = re.sub(r'\.[0-9]+', '', date_str)
if timezone is None:
m = re.search(
r'(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
date_str)
if not m:
timezone = datetime.timedelta()
else:
date_str = date_str[:-len(m.group(0))]
if not m.group('sign'):
timezone = datetime.timedelta()
else:
sign = 1 if m.group('sign') == '+' else -1
timezone = datetime.timedelta(
hours=sign * int(m.group('hours')),
minutes=sign * int(m.group('minutes')))
timezone, date_str = extract_timezone(date_str)
try:
date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
dt = datetime.datetime.strptime(date_str, date_format) - timezone
@ -1006,6 +1055,10 @@ def parse_iso8601(date_str, delimiter='T', timezone=None):
pass
def date_formats(day_first=True):
return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
def unified_strdate(date_str, day_first=True):
"""Return a string with the date in the format YYYYMMDD"""
@ -1014,53 +1067,11 @@ def unified_strdate(date_str, day_first=True):
upload_date = None
# Replace commas
date_str = date_str.replace(',', ' ')
# %z (UTC offset) is only supported in python>=3.2
if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
# Remove AM/PM + timezone
date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
_, date_str = extract_timezone(date_str)
format_expressions = [
'%d %B %Y',
'%d %b %Y',
'%B %d %Y',
'%b %d %Y',
'%b %dst %Y %I:%M',
'%b %dnd %Y %I:%M',
'%b %dth %Y %I:%M',
'%Y %m %d',
'%Y-%m-%d',
'%Y/%m/%d',
'%Y/%m/%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S.%f',
'%d.%m.%Y %H:%M',
'%d.%m.%Y %H.%M',
'%Y-%m-%dT%H:%M:%SZ',
'%Y-%m-%dT%H:%M:%S.%fZ',
'%Y-%m-%dT%H:%M:%S.%f0Z',
'%Y-%m-%dT%H:%M:%S',
'%Y-%m-%dT%H:%M:%S.%f',
'%Y-%m-%dT%H:%M',
]
if day_first:
format_expressions.extend([
'%d-%m-%Y',
'%d.%m.%Y',
'%d.%m.%y',
'%d/%m/%Y',
'%d/%m/%y',
'%d/%m/%Y %H:%M:%S',
])
else:
format_expressions.extend([
'%m-%d-%Y',
'%m.%d.%Y',
'%m/%d/%Y',
'%m/%d/%y',
'%m/%d/%Y %H:%M:%S',
])
for expression in format_expressions:
for expression in date_formats(day_first):
try:
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
except ValueError:
@ -1076,6 +1087,29 @@ def unified_strdate(date_str, day_first=True):
return compat_str(upload_date)
def unified_timestamp(date_str, day_first=True):
if date_str is None:
return None
date_str = date_str.replace(',', ' ')
pm_delta = datetime.timedelta(hours=12 if re.search(r'(?i)PM', date_str) else 0)
timezone, date_str = extract_timezone(date_str)
# Remove AM/PM + timezone
date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
for expression in date_formats(day_first):
try:
dt = datetime.datetime.strptime(date_str, expression) - timezone + pm_delta
return calendar.timegm(dt.timetuple())
except ValueError:
pass
timetuple = email.utils.parsedate_tz(date_str)
if timetuple:
return calendar.timegm(timetuple.timetuple())
def determine_ext(url, default_ext='unknown_video'):
if url is None:
return default_ext
@ -1626,6 +1660,10 @@ def float_or_none(v, scale=1, invscale=1, default=None):
return default
def strip_or_none(v):
return None if v is None else v.strip()
def parse_duration(s):
if not isinstance(s, compat_basestring):
return None
@ -2861,3 +2899,7 @@ def parse_m3u8_attributes(attrib):
val = val[1:-1]
info[key] = val
return info
def urshift(val, n):
return val >> n if val >= 0 else (val + 0x100000000) >> n

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2016.06.23.1'
__version__ = '2016.07.02'