Compare commits
70 Commits
2017.02.04
...
2017.02.14
Author | SHA1 | Date | |
---|---|---|---|
58a65ba852 | |||
cedf08ff54 | |||
50de3dbad3 | |||
085f169ffe | |||
f6d6ca1db3 | |||
6e5956e6ba | |||
50fd3c2c69 | |||
89c6691f9d | |||
454e5cdb17 | |||
1de9f78e71 | |||
9dad941853 | |||
1e2c3f61fc | |||
0dac7cbb09 | |||
f8514630db | |||
459818e280 | |||
6310acf512 | |||
8d38dafbbf | |||
f3915452de | |||
2f49bcd690 | |||
68c22c4c15 | |||
9b92a5917b | |||
3e2274c8b7 | |||
3d7e3aaa0e | |||
624c4b92ff | |||
2af12ad9d2 | |||
97eb9bd2ac | |||
71cdd75628 | |||
c7d6f614f3 | |||
08a00eef79 | |||
9dd5408c99 | |||
9510709575 | |||
5abcca9060 | |||
e01bfc19c3 | |||
4d32b63851 | |||
55d4de2283 | |||
61ee556aea | |||
ff24261ba0 | |||
fbc6dc525e | |||
9150d1eb69 | |||
b7f9843bec | |||
e64b0fca14 | |||
78ef214d2d | |||
be670b8e8f | |||
37084f6641 | |||
b04975733c | |||
c8b8fb0a99 | |||
8298018273 | |||
ae8d5a5c59 | |||
b9c9cb5f79 | |||
fdf9b959bc | |||
013877298d | |||
c87f95f991 | |||
f28aeff264 | |||
242a14a1f6 | |||
d5d904ff7d | |||
5620f840f6 | |||
b7a8c1bcfa | |||
7097bffba6 | |||
2aec7256ae | |||
815482d4eb | |||
9c14fe9681 | |||
e705755739 | |||
019f4c0371 | |||
2ab2c0d1f5 | |||
caf0f5f8b7 | |||
e4e50f60b1 | |||
6ef3e65a7b | |||
6fd138bed8 | |||
49bd8d5e2e | |||
3d2c2752c5 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.04.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.04.1**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.14**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2017.02.04.1
|
[debug] youtube-dl version 2017.02.14
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
@ -6,8 +6,14 @@ python:
|
|||||||
- "3.3"
|
- "3.3"
|
||||||
- "3.4"
|
- "3.4"
|
||||||
- "3.5"
|
- "3.5"
|
||||||
|
- "3.6"
|
||||||
sudo: false
|
sudo: false
|
||||||
script: nosetests test --verbose
|
env:
|
||||||
|
- YTDL_TEST_SET=core
|
||||||
|
- YTDL_TEST_SET=download
|
||||||
|
before_script:
|
||||||
|
- chmod +x ./devscripts/run_tests.sh
|
||||||
|
script: ./devscripts/run_tests.sh
|
||||||
notifications:
|
notifications:
|
||||||
email:
|
email:
|
||||||
- filippo.valsorda@gmail.com
|
- filippo.valsorda@gmail.com
|
||||||
|
1
AUTHORS
1
AUTHORS
@ -201,3 +201,4 @@ Stephen Chen
|
|||||||
Fabian Stahl
|
Fabian Stahl
|
||||||
Bagira
|
Bagira
|
||||||
Odd Stråbø
|
Odd Stråbø
|
||||||
|
Philip Herzog
|
||||||
|
71
ChangeLog
71
ChangeLog
@ -1,3 +1,74 @@
|
|||||||
|
version 2017.02.14
|
||||||
|
|
||||||
|
Core
|
||||||
|
* TypeError is fixed with Python 2.7.13 on Windows (#11540, #12085)
|
||||||
|
|
||||||
|
Extractor
|
||||||
|
* [zdf] Fix extraction (#12117)
|
||||||
|
* [xtube] Fix extraction for both kinds of video id (#12088)
|
||||||
|
* [xtube] Improve title extraction (#12088)
|
||||||
|
+ [lemonde] Fallback delegate extraction to generic extractor (#12115, #12116)
|
||||||
|
* [bellmedia] Allow video id longer than 6 characters (#12114)
|
||||||
|
+ [limelight] Add support for referer protected videos
|
||||||
|
* [disney] Improve extraction (#4975, #11000, #11882, #11936)
|
||||||
|
* [hotstar] Improve extraction (#12096)
|
||||||
|
* [einthusan] Fix extraction (#11416)
|
||||||
|
+ [aenetworks] Add support for lifetimemovieclub.com (#12097)
|
||||||
|
* [youtube] Fix parsing codecs (#12091)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.02.11
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [utils] Introduce get_elements_by_class and get_elements_by_attribute
|
||||||
|
utility functions
|
||||||
|
+ [extractor/common] Skip m3u8 manifests protected with Adobe Flash Access
|
||||||
|
|
||||||
|
Extractor
|
||||||
|
* [pluralsight:course] Fix extraction (#12075)
|
||||||
|
+ [bbc] Extract m3u8 formats with 320k audio
|
||||||
|
* [facebook] Relax video id matching (#11017, #12055, #12056)
|
||||||
|
+ [corus] Add support for Corus Entertainment sites (#12060, #9164)
|
||||||
|
+ [pluralsight] Detect blocked account error message (#12070)
|
||||||
|
+ [bloomberg] Add another video id pattern (#12062)
|
||||||
|
* [extractor/commonmistakes] Restrict URL regular expression (#12050)
|
||||||
|
+ [tvplayer] Add support for tvplayer.com
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.02.10
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [xtube] Fix extraction (#12023)
|
||||||
|
* [pornhub] Fix extraction (#12007, #12018)
|
||||||
|
* [facebook] Improve JS data regular expression (#12042)
|
||||||
|
* [kaltura] Improve embed partner id extraction (#12041)
|
||||||
|
+ [sprout] Add support for sproutonline.com
|
||||||
|
* [6play] Improve extraction
|
||||||
|
+ [scrippsnetworks:watch] Add support for Scripps Networks sites (#10765)
|
||||||
|
+ [go] Add support for Adobe Pass authentication (#11468, #10831)
|
||||||
|
* [6play] Fix extraction (#12011)
|
||||||
|
+ [nbc] Add support for Adobe Pass authentication (#12006)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.02.07
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [extractor/common] Fix audio only with audio group in m3u8 (#11995)
|
||||||
|
+ [downloader/fragment] Respect --no-part
|
||||||
|
* [extractor/common] Speed-up HTML5 media entries extraction (#11979)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [pornhub] Fix extraction (#11997)
|
||||||
|
+ [canalplus] Add support for cstar.fr (#11990)
|
||||||
|
+ [extractor/generic] Improve RTMP support (#11993)
|
||||||
|
+ [gaskrank] Add support for gaskrank.tv (#11685)
|
||||||
|
* [bandcamp] Fix extraction for incomplete albums (#11727)
|
||||||
|
* [iwara] Fix extraction (#11781)
|
||||||
|
* [googledrive] Fix extraction on Python 3.6
|
||||||
|
+ [videopress] Add support for videopress.com
|
||||||
|
+ [afreecatv] Extract RTMP formats
|
||||||
|
|
||||||
|
|
||||||
version 2017.02.04.1
|
version 2017.02.04.1
|
||||||
|
|
||||||
Extractors
|
Extractors
|
||||||
|
19
devscripts/run_tests.sh
Normal file
19
devscripts/run_tests.sh
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter"
|
||||||
|
|
||||||
|
test_set=""
|
||||||
|
|
||||||
|
case "$YTDL_TEST_SET" in
|
||||||
|
core)
|
||||||
|
test_set="-I test_($DOWNLOAD_TESTS)\.py"
|
||||||
|
;;
|
||||||
|
download)
|
||||||
|
test_set="-I test_(?!$DOWNLOAD_TESTS).+\.py"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
break
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
nosetests test --verbose $test_set
|
@ -11,6 +11,7 @@
|
|||||||
- **4tube**
|
- **4tube**
|
||||||
- **56.com**
|
- **56.com**
|
||||||
- **5min**
|
- **5min**
|
||||||
|
- **6play**
|
||||||
- **8tracks**
|
- **8tracks**
|
||||||
- **91porn**
|
- **91porn**
|
||||||
- **9c9media**
|
- **9c9media**
|
||||||
@ -168,6 +169,7 @@
|
|||||||
- **ComedyCentralShortname**
|
- **ComedyCentralShortname**
|
||||||
- **ComedyCentralTV**
|
- **ComedyCentralTV**
|
||||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||||
|
- **Corus**
|
||||||
- **Coub**
|
- **Coub**
|
||||||
- **Cracked**
|
- **Cracked**
|
||||||
- **Crackle**
|
- **Crackle**
|
||||||
@ -282,6 +284,7 @@
|
|||||||
- **Gamersyde**
|
- **Gamersyde**
|
||||||
- **GameSpot**
|
- **GameSpot**
|
||||||
- **GameStar**
|
- **GameStar**
|
||||||
|
- **Gaskrank**
|
||||||
- **Gazeta**
|
- **Gazeta**
|
||||||
- **GDCVault**
|
- **GDCVault**
|
||||||
- **generic**: Generic downloader that works on some sites
|
- **generic**: Generic downloader that works on some sites
|
||||||
@ -307,7 +310,6 @@
|
|||||||
- **HellPorno**
|
- **HellPorno**
|
||||||
- **Helsinki**: helsinki.fi
|
- **Helsinki**: helsinki.fi
|
||||||
- **HentaiStigma**
|
- **HentaiStigma**
|
||||||
- **HGTV**
|
|
||||||
- **hgtv.com:show**
|
- **hgtv.com:show**
|
||||||
- **HistoricFilms**
|
- **HistoricFilms**
|
||||||
- **history:topic**: History.com Topic
|
- **history:topic**: History.com Topic
|
||||||
@ -666,6 +668,7 @@
|
|||||||
- **screen.yahoo:search**: Yahoo screen search
|
- **screen.yahoo:search**: Yahoo screen search
|
||||||
- **Screencast**
|
- **Screencast**
|
||||||
- **ScreencastOMatic**
|
- **ScreencastOMatic**
|
||||||
|
- **scrippsnetworks:watch**
|
||||||
- **Seeker**
|
- **Seeker**
|
||||||
- **SenateISVP**
|
- **SenateISVP**
|
||||||
- **SendtoNews**
|
- **SendtoNews**
|
||||||
@ -675,7 +678,6 @@
|
|||||||
- **Shared**: shared.sx
|
- **Shared**: shared.sx
|
||||||
- **ShowRoomLive**
|
- **ShowRoomLive**
|
||||||
- **Sina**
|
- **Sina**
|
||||||
- **SixPlay**
|
|
||||||
- **skynewsarabia:article**
|
- **skynewsarabia:article**
|
||||||
- **skynewsarabia:video**
|
- **skynewsarabia:video**
|
||||||
- **SkySports**
|
- **SkySports**
|
||||||
@ -710,6 +712,7 @@
|
|||||||
- **SportBoxEmbed**
|
- **SportBoxEmbed**
|
||||||
- **SportDeutschland**
|
- **SportDeutschland**
|
||||||
- **Sportschau**
|
- **Sportschau**
|
||||||
|
- **Sprout**
|
||||||
- **sr:mediathek**: Saarländischer Rundfunk
|
- **sr:mediathek**: Saarländischer Rundfunk
|
||||||
- **SRGSSR**
|
- **SRGSSR**
|
||||||
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
|
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
|
||||||
@ -803,6 +806,7 @@
|
|||||||
- **tvp**: Telewizja Polska
|
- **tvp**: Telewizja Polska
|
||||||
- **tvp:embed**: Telewizja Polska
|
- **tvp:embed**: Telewizja Polska
|
||||||
- **tvp:series**
|
- **tvp:series**
|
||||||
|
- **TVPlayer**
|
||||||
- **Tweakers**
|
- **Tweakers**
|
||||||
- **twitch:chapter**
|
- **twitch:chapter**
|
||||||
- **twitch:clips**
|
- **twitch:clips**
|
||||||
@ -859,6 +863,7 @@
|
|||||||
- **videomore:season**
|
- **videomore:season**
|
||||||
- **videomore:video**
|
- **videomore:video**
|
||||||
- **VideoPremium**
|
- **VideoPremium**
|
||||||
|
- **VideoPress**
|
||||||
- **videoweed**: VideoWeed
|
- **videoweed**: VideoWeed
|
||||||
- **Vidio**
|
- **Vidio**
|
||||||
- **vidme**
|
- **vidme**
|
||||||
|
@ -34,6 +34,9 @@ from youtube_dl.utils import (
|
|||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
|
get_element_by_attribute,
|
||||||
|
get_elements_by_class,
|
||||||
|
get_elements_by_attribute,
|
||||||
InAdvancePagedList,
|
InAdvancePagedList,
|
||||||
intlist_to_bytes,
|
intlist_to_bytes,
|
||||||
is_html,
|
is_html,
|
||||||
@ -1124,6 +1127,32 @@ The first line
|
|||||||
self.assertEqual(get_element_by_class('foo', html), 'nice')
|
self.assertEqual(get_element_by_class('foo', html), 'nice')
|
||||||
self.assertEqual(get_element_by_class('no-such-class', html), None)
|
self.assertEqual(get_element_by_class('no-such-class', html), None)
|
||||||
|
|
||||||
|
def test_get_element_by_attribute(self):
|
||||||
|
html = '''
|
||||||
|
<span class="foo bar">nice</span>
|
||||||
|
'''
|
||||||
|
|
||||||
|
self.assertEqual(get_element_by_attribute('class', 'foo bar', html), 'nice')
|
||||||
|
self.assertEqual(get_element_by_attribute('class', 'foo', html), None)
|
||||||
|
self.assertEqual(get_element_by_attribute('class', 'no-such-foo', html), None)
|
||||||
|
|
||||||
|
def test_get_elements_by_class(self):
|
||||||
|
html = '''
|
||||||
|
<span class="foo bar">nice</span><span class="foo bar">also nice</span>
|
||||||
|
'''
|
||||||
|
|
||||||
|
self.assertEqual(get_elements_by_class('foo', html), ['nice', 'also nice'])
|
||||||
|
self.assertEqual(get_elements_by_class('no-such-class', html), [])
|
||||||
|
|
||||||
|
def test_get_elements_by_attribute(self):
|
||||||
|
html = '''
|
||||||
|
<span class="foo bar">nice</span><span class="foo bar">also nice</span>
|
||||||
|
'''
|
||||||
|
|
||||||
|
self.assertEqual(get_elements_by_attribute('class', 'foo bar', html), ['nice', 'also nice'])
|
||||||
|
self.assertEqual(get_elements_by_attribute('class', 'foo', html), [])
|
||||||
|
self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), [])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -275,7 +275,7 @@ class FFmpegFD(ExternalFD):
|
|||||||
args += ['-f', 'mpegts']
|
args += ['-f', 'mpegts']
|
||||||
else:
|
else:
|
||||||
args += ['-f', 'mp4']
|
args += ['-f', 'mp4']
|
||||||
if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2')) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
|
if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
|
||||||
args += ['-bsf:a', 'aac_adtstoasc']
|
args += ['-bsf:a', 'aac_adtstoasc']
|
||||||
elif protocol == 'rtmp':
|
elif protocol == 'rtmp':
|
||||||
args += ['-f', 'flv']
|
args += ['-f', 'flv']
|
||||||
|
@ -61,6 +61,7 @@ class FragmentFD(FileDownloader):
|
|||||||
'noprogress': True,
|
'noprogress': True,
|
||||||
'ratelimit': self.params.get('ratelimit'),
|
'ratelimit': self.params.get('ratelimit'),
|
||||||
'retries': self.params.get('retries', 0),
|
'retries': self.params.get('retries', 0),
|
||||||
|
'nopart': self.params.get('nopart', False),
|
||||||
'test': self.params.get('test', False),
|
'test': self.params.get('test', False),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
@ -23,7 +23,7 @@ class AENetworksBaseIE(ThePlatformIE):
|
|||||||
class AENetworksIE(AENetworksBaseIE):
|
class AENetworksIE(AENetworksBaseIE):
|
||||||
IE_NAME = 'aenetworks'
|
IE_NAME = 'aenetworks'
|
||||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
|
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)/full-movie)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime|lifetimemovieclub)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
||||||
'md5': 'a97a65f7e823ae10e9244bc5433d5fe6',
|
'md5': 'a97a65f7e823ae10e9244bc5433d5fe6',
|
||||||
@ -62,11 +62,15 @@ class AENetworksIE(AENetworksBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
|
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.lifetimemovieclub.com/movies/a-killer-among-us',
|
||||||
|
'only_matching': True
|
||||||
}]
|
}]
|
||||||
_DOMAIN_TO_REQUESTOR_ID = {
|
_DOMAIN_TO_REQUESTOR_ID = {
|
||||||
'history.com': 'HISTORY',
|
'history.com': 'HISTORY',
|
||||||
'aetv.com': 'AETV',
|
'aetv.com': 'AETV',
|
||||||
'mylifetime.com': 'LIFETIME',
|
'mylifetime.com': 'LIFETIME',
|
||||||
|
'lifetimemovieclub.com': 'LIFETIMEMOVIECLUB',
|
||||||
'fyi.tv': 'FYI',
|
'fyi.tv': 'FYI',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -221,10 +221,23 @@ class AfreecaTVGlobalIE(AfreecaTVIE):
|
|||||||
s_url = s.get('purl')
|
s_url = s.get('purl')
|
||||||
if not s_url:
|
if not s_url:
|
||||||
continue
|
continue
|
||||||
# TODO: extract rtmp formats
|
stype = s.get('stype')
|
||||||
if s.get('stype') == 'HLS':
|
if stype == 'HLS':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
s_url, channel_id, 'mp4', fatal=False))
|
s_url, channel_id, 'mp4', m3u8_id=stype, fatal=False))
|
||||||
|
elif stype == 'RTMP':
|
||||||
|
format_id = [stype]
|
||||||
|
label = s.get('label')
|
||||||
|
if label:
|
||||||
|
format_id.append(label)
|
||||||
|
formats.append({
|
||||||
|
'format_id': '-'.join(format_id),
|
||||||
|
'url': s_url,
|
||||||
|
'tbr': int_or_none(s.get('bps')),
|
||||||
|
'height': int_or_none(s.get('brt')),
|
||||||
|
'ext': 'flv',
|
||||||
|
'rtmp_live': True,
|
||||||
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
info.update({
|
info.update({
|
||||||
|
@ -209,6 +209,15 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
'id': 'entropy-ep',
|
'id': 'entropy-ep',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 3,
|
'playlist_mincount': 3,
|
||||||
|
}, {
|
||||||
|
# not all tracks have songs
|
||||||
|
'url': 'https://insulters.bandcamp.com/album/we-are-the-plague',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'we-are-the-plague',
|
||||||
|
'title': 'WE ARE THE PLAGUE',
|
||||||
|
'uploader_id': 'insulters',
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -217,12 +226,16 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
album_id = mobj.group('album_id')
|
album_id = mobj.group('album_id')
|
||||||
playlist_id = album_id or uploader_id
|
playlist_id = album_id or uploader_id
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
|
track_elements = re.findall(
|
||||||
if not tracks_paths:
|
r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage)
|
||||||
|
if not track_elements:
|
||||||
raise ExtractorError('The page doesn\'t contain any tracks')
|
raise ExtractorError('The page doesn\'t contain any tracks')
|
||||||
|
# Only tracks with duration info have songs
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
|
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
|
||||||
for t_path in tracks_paths]
|
for elem_content, t_path in track_elements
|
||||||
|
if self._html_search_meta('duration', elem_content, default=None)]
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
|
r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
|
||||||
webpage, 'title', fatal=False)
|
webpage, 'title', fatal=False)
|
||||||
|
@ -225,6 +225,8 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||||
|
|
||||||
class MediaSelectionError(Exception):
|
class MediaSelectionError(Exception):
|
||||||
def __init__(self, id):
|
def __init__(self, id):
|
||||||
self.id = id
|
self.id = id
|
||||||
@ -336,6 +338,15 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||||
m3u8_id=format_id, fatal=False))
|
m3u8_id=format_id, fatal=False))
|
||||||
|
if re.search(self._USP_RE, href):
|
||||||
|
usp_formats = self._extract_m3u8_formats(
|
||||||
|
re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
|
||||||
|
programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id=format_id, fatal=False)
|
||||||
|
for f in usp_formats:
|
||||||
|
if f.get('height') and f['height'] > 720:
|
||||||
|
continue
|
||||||
|
formats.append(f)
|
||||||
elif transfer_format == 'hds':
|
elif transfer_format == 'hds':
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
href, programme_id, f4m_id=format_id, fatal=False))
|
href, programme_id, f4m_id=format_id, fatal=False))
|
||||||
|
@ -24,7 +24,7 @@ class BellMediaIE(InfoExtractor):
|
|||||||
space
|
space
|
||||||
)\.ca|
|
)\.ca|
|
||||||
much\.com
|
much\.com
|
||||||
)/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6})'''
|
)/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ctv.ca/video/player?vid=706966',
|
'url': 'http://www.ctv.ca/video/player?vid=706966',
|
||||||
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
|
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
|
||||||
@ -55,6 +55,9 @@ class BellMediaIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6',
|
'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_DOMAINS = {
|
_DOMAINS = {
|
||||||
'thecomedynetwork': 'comedy',
|
'thecomedynetwork': 'comedy',
|
||||||
|
@ -33,6 +33,10 @@ class BloombergIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'format': 'best[format_id^=hds]',
|
'format': 'best[format_id^=hds]',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# data-bmmrid=
|
||||||
|
'url': 'https://www.bloomberg.com/politics/articles/2017-02-08/le-pen-aide-briefed-french-central-banker-on-plan-to-print-money',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
|
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -45,9 +49,10 @@ class BloombergIE(InfoExtractor):
|
|||||||
name = self._match_id(url)
|
name = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, name)
|
webpage = self._download_webpage(url, name)
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
(r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
(r'["\']bmmrId["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||||
r'videoId\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
|
r'videoId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||||
webpage, 'id', group='url', default=None)
|
r'data-bmmrid=(["\'])(?P<id>(?:(?!\1).)+)\1'),
|
||||||
|
webpage, 'id', group='id', default=None)
|
||||||
if not video_id:
|
if not video_id:
|
||||||
bplayer_data = self._parse_json(self._search_regex(
|
bplayer_data = self._parse_json(self._search_regex(
|
||||||
r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name)
|
r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name)
|
||||||
|
@ -27,6 +27,7 @@ class CanalplusIE(InfoExtractor):
|
|||||||
(?:www\.)?d8\.tv|
|
(?:www\.)?d8\.tv|
|
||||||
(?:www\.)?c8\.fr|
|
(?:www\.)?c8\.fr|
|
||||||
(?:www\.)?d17\.tv|
|
(?:www\.)?d17\.tv|
|
||||||
|
(?:(?:football|www)\.)?cstar\.fr|
|
||||||
(?:www\.)?itele\.fr
|
(?:www\.)?itele\.fr
|
||||||
)/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
|
)/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
|
||||||
player\.canalplus\.fr/#/(?P<id>\d+)
|
player\.canalplus\.fr/#/(?P<id>\d+)
|
||||||
@ -40,6 +41,7 @@ class CanalplusIE(InfoExtractor):
|
|||||||
'd8': 'd8',
|
'd8': 'd8',
|
||||||
'c8': 'd8',
|
'c8': 'd8',
|
||||||
'd17': 'd17',
|
'd17': 'd17',
|
||||||
|
'cstar': 'd17',
|
||||||
'itele': 'itele',
|
'itele': 'itele',
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -86,6 +88,19 @@ class CanalplusIE(InfoExtractor):
|
|||||||
'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.',
|
'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.',
|
||||||
'upload_date': '20161014',
|
'upload_date': '20161014',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://football.cstar.fr/cstar-minisite-foot/pid7566-feminines-videos.html?vid=1416769',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1416769',
|
||||||
|
'display_id': 'pid7566-feminines-videos',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'France - Albanie : les temps forts de la soirée - 20/09/2016',
|
||||||
|
'description': 'md5:c3f30f2aaac294c1c969b3294de6904e',
|
||||||
|
'upload_date': '20160921',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://m.canalplus.fr/?vid=1398231',
|
'url': 'http://m.canalplus.fr/?vid=1398231',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -1208,6 +1208,9 @@ class InfoExtractor(object):
|
|||||||
m3u8_doc, urlh = res
|
m3u8_doc, urlh = res
|
||||||
m3u8_url = urlh.geturl()
|
m3u8_url = urlh.geturl()
|
||||||
|
|
||||||
|
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
|
||||||
|
return []
|
||||||
|
|
||||||
formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
|
formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
|
||||||
|
|
||||||
format_url = lambda u: (
|
format_url = lambda u: (
|
||||||
@ -1315,8 +1318,8 @@ class InfoExtractor(object):
|
|||||||
'abr': abr,
|
'abr': abr,
|
||||||
})
|
})
|
||||||
f.update(parse_codecs(last_info.get('CODECS')))
|
f.update(parse_codecs(last_info.get('CODECS')))
|
||||||
if audio_in_video_stream.get(last_info.get('AUDIO')) is False:
|
if audio_in_video_stream.get(last_info.get('AUDIO')) is False and f['vcodec'] != 'none':
|
||||||
# TODO: update acodec for for audio only formats with the same GROUP-ID
|
# TODO: update acodec for audio only formats with the same GROUP-ID
|
||||||
f['acodec'] = 'none'
|
f['acodec'] = 'none'
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
last_info = {}
|
last_info = {}
|
||||||
@ -1959,7 +1962,12 @@ class InfoExtractor(object):
|
|||||||
media_tags = [(media_tag, media_type, '')
|
media_tags = [(media_tag, media_type, '')
|
||||||
for media_tag, media_type
|
for media_tag, media_type
|
||||||
in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
|
in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
|
||||||
media_tags.extend(re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage))
|
media_tags.extend(re.findall(
|
||||||
|
# We only allow video|audio followed by a whitespace or '>'.
|
||||||
|
# Allowing more characters may end up in significant slow down (see
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/11979, example URL:
|
||||||
|
# http://www.porntrex.com/maps/videositemap.xml).
|
||||||
|
r'(?s)(<(?P<tag>video|audio)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
|
||||||
for media_tag, media_type, media_content in media_tags:
|
for media_tag, media_type, media_content in media_tags:
|
||||||
media_info = {
|
media_info = {
|
||||||
'formats': [],
|
'formats': [],
|
||||||
|
@ -7,7 +7,7 @@ from ..utils import ExtractorError
|
|||||||
class CommonMistakesIE(InfoExtractor):
|
class CommonMistakesIE(InfoExtractor):
|
||||||
IE_DESC = False # Do not list
|
IE_DESC = False # Do not list
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:url|URL)
|
(?:url|URL)$
|
||||||
'''
|
'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
72
youtube_dl/extractor/corus.py
Normal file
72
youtube_dl/extractor/corus.py
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .theplatform import ThePlatformFeedIE
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class CorusIE(ThePlatformFeedIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:globaltv|etcanada)\.com|(?:hgtv|foodnetwork|slice)\.ca)/(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
|
||||||
|
'md5': '05dcbca777bf1e58c2acbb57168ad3a6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '870923331648',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Movie Night Popcorn with Bryan',
|
||||||
|
'description': 'Bryan whips up homemade popcorn, the old fashion way for Jojo and Lincoln.',
|
||||||
|
'uploader': 'SHWM-NEW',
|
||||||
|
'upload_date': '20170206',
|
||||||
|
'timestamp': 1486392197,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
_TP_FEEDS = {
|
||||||
|
'globaltv': {
|
||||||
|
'feed_id': 'ChQqrem0lNUp',
|
||||||
|
'account_id': 2269680845,
|
||||||
|
},
|
||||||
|
'etcanada': {
|
||||||
|
'feed_id': 'ChQqrem0lNUp',
|
||||||
|
'account_id': 2269680845,
|
||||||
|
},
|
||||||
|
'hgtv': {
|
||||||
|
'feed_id': 'L0BMHXi2no43',
|
||||||
|
'account_id': 2414428465,
|
||||||
|
},
|
||||||
|
'foodnetwork': {
|
||||||
|
'feed_id': 'ukK8o58zbRmJ',
|
||||||
|
'account_id': 2414429569,
|
||||||
|
},
|
||||||
|
'slice': {
|
||||||
|
'feed_id': '5tUJLgV2YNJ5',
|
||||||
|
'account_id': 2414427935,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
domain, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
feed_info = self._TP_FEEDS[domain.split('.')[0]]
|
||||||
|
return self._extract_feed_info('dtjsEC', feed_info['feed_id'], 'byId=' + video_id, video_id, lambda e: {
|
||||||
|
'episode_number': int_or_none(e.get('pl1$episode')),
|
||||||
|
'season_number': int_or_none(e.get('pl1$season')),
|
||||||
|
'series': e.get('pl1$show'),
|
||||||
|
}, {
|
||||||
|
'HLS': {
|
||||||
|
'manifest': 'm3u',
|
||||||
|
},
|
||||||
|
'DesktopHLS Default': {
|
||||||
|
'manifest': 'm3u',
|
||||||
|
},
|
||||||
|
'MP4 MBR': {
|
||||||
|
'manifest': 'm3u',
|
||||||
|
},
|
||||||
|
}, feed_info['account_id'])
|
@ -9,13 +9,15 @@ from ..utils import (
|
|||||||
unified_strdate,
|
unified_strdate,
|
||||||
compat_str,
|
compat_str,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DisneyIE(InfoExtractor):
|
class DisneyIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|starwars\.com))/(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})'''
|
https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})|(?:[^/]+/)?(?P<display_id>[^/?#]+))'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
# Disney.EmbedVideo
|
||||||
'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977',
|
'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '545ed1857afee5a0ec239977',
|
'id': '545ed1857afee5a0ec239977',
|
||||||
@ -28,6 +30,20 @@ class DisneyIE(InfoExtractor):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# Grill.burger
|
||||||
|
'url': 'http://www.starwars.com/video/rogue-one-a-star-wars-story-intro-featurette',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5454e9f4e9804a552e3524c8',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '"Intro" Featurette: Rogue One: A Star Wars Story',
|
||||||
|
'upload_date': '20170104',
|
||||||
|
'description': 'Go behind-the-scenes of Rogue One: A Star Wars Story in this featurette with Director Gareth Edwards and the cast of the film.',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://videos.disneylatino.com/ver/spider-man-de-regreso-a-casa-primer-adelanto-543a33a1850bdcfcca13bae2',
|
'url': 'http://videos.disneylatino.com/ver/spider-man-de-regreso-a-casa-primer-adelanto-543a33a1850bdcfcca13bae2',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -43,31 +59,55 @@ class DisneyIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.starwars.com/embed/54690d1e6c42e5f09a0fb097',
|
'url': 'http://www.starwars.com/embed/54690d1e6c42e5f09a0fb097',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://spiderman.marvelkids.com/embed/522900d2ced3c565e4cc0677',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://spiderman.marvelkids.com/videos/contest-of-champions-part-four-clip-1',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://disneyjunior.en.disneyme.com/dj/watch-my-friends-tigger-and-pooh-promo',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://disneyjunior.disney.com/galactech-the-galactech-grab-galactech-an-admiral-rescue',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
domain, video_id = re.match(self._VALID_URL, url).groups()
|
domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
webpage = self._download_webpage(
|
if not video_id:
|
||||||
'http://%s/embed/%s' % (domain, video_id), video_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
video_data = self._parse_json(self._search_regex(
|
grill = re.sub(r'"\s*\+\s*"', '', self._search_regex(
|
||||||
r'Disney\.EmbedVideo=({.+});', webpage, 'embed data'), video_id)['video']
|
r'Grill\.burger\s*=\s*({.+})\s*:',
|
||||||
|
webpage, 'grill data'))
|
||||||
|
page_data = next(s for s in self._parse_json(grill, display_id)['stack'] if s.get('type') == 'video')
|
||||||
|
video_data = page_data['data'][0]
|
||||||
|
else:
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'http://%s/embed/%s' % (domain, video_id), video_id)
|
||||||
|
page_data = self._parse_json(self._search_regex(
|
||||||
|
r'Disney\.EmbedVideo\s*=\s*({.+});',
|
||||||
|
webpage, 'embed data'), video_id)
|
||||||
|
video_data = page_data['video']
|
||||||
|
|
||||||
for external in video_data.get('externals', []):
|
for external in video_data.get('externals', []):
|
||||||
if external.get('source') == 'vevo':
|
if external.get('source') == 'vevo':
|
||||||
return self.url_result('vevo:' + external['data_id'], 'Vevo')
|
return self.url_result('vevo:' + external['data_id'], 'Vevo')
|
||||||
|
|
||||||
|
video_id = video_data['id']
|
||||||
title = video_data['title']
|
title = video_data['title']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for flavor in video_data.get('flavors', []):
|
for flavor in video_data.get('flavors', []):
|
||||||
flavor_format = flavor.get('format')
|
flavor_format = flavor.get('format')
|
||||||
flavor_url = flavor.get('url')
|
flavor_url = flavor.get('url')
|
||||||
if not flavor_url or not re.match(r'https?://', flavor_url):
|
if not flavor_url or not re.match(r'https?://', flavor_url) or flavor_format == 'mp4_access':
|
||||||
continue
|
continue
|
||||||
tbr = int_or_none(flavor.get('bitrate'))
|
tbr = int_or_none(flavor.get('bitrate'))
|
||||||
if tbr == 99999:
|
if tbr == 99999:
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
flavor_url, video_id, 'mp4', m3u8_id=flavor_format, fatal=False))
|
flavor_url, video_id, 'mp4',
|
||||||
|
m3u8_id=flavor_format, fatal=False))
|
||||||
continue
|
continue
|
||||||
format_id = []
|
format_id = []
|
||||||
if flavor_format:
|
if flavor_format:
|
||||||
@ -88,6 +128,10 @@ class DisneyIE(InfoExtractor):
|
|||||||
'ext': ext,
|
'ext': ext,
|
||||||
'vcodec': 'none' if (width == 0 and height == 0) else None,
|
'vcodec': 'none' if (width == 0 and height == 0) else None,
|
||||||
})
|
})
|
||||||
|
if not formats and video_data.get('expired'):
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s said: %s' % (self.IE_NAME, page_data['translations']['video_expired']),
|
||||||
|
expected=True)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
|
@ -1,67 +1,94 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urlparse
|
from ..compat import (
|
||||||
|
compat_urlparse,
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
remove_start,
|
extract_attributes,
|
||||||
sanitized_Request,
|
ExtractorError,
|
||||||
|
get_elements_by_class,
|
||||||
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class EinthusanIE(InfoExtractor):
|
class EinthusanIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?einthusan\.com/movies/watch.php\?([^#]*?)id=(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P<id>[0-9]+)'
|
||||||
_TESTS = [
|
_TEST = {
|
||||||
{
|
'url': 'https://einthusan.tv/movie/watch/9097/',
|
||||||
'url': 'http://www.einthusan.com/movies/watch.php?id=2447',
|
'md5': 'ff0f7f2065031b8a2cf13a933731c035',
|
||||||
'md5': 'd71379996ff5b7f217eca034c34e3461',
|
'info_dict': {
|
||||||
'info_dict': {
|
'id': '9097',
|
||||||
'id': '2447',
|
'ext': 'mp4',
|
||||||
'ext': 'mp4',
|
'title': 'Ae Dil Hai Mushkil',
|
||||||
'title': 'Ek Villain',
|
'description': 'md5:33ef934c82a671a94652a9b4e54d931b',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'description': 'md5:9d29fc91a7abadd4591fb862fa560d93',
|
}
|
||||||
}
|
}
|
||||||
},
|
|
||||||
{
|
# reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
|
||||||
'url': 'http://www.einthusan.com/movies/watch.php?id=1671',
|
def _decrypt(self, encrypted_data, video_id):
|
||||||
'md5': 'b16a6fd3c67c06eb7c79c8a8615f4213',
|
return self._parse_json(base64.b64decode((
|
||||||
'info_dict': {
|
encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1]
|
||||||
'id': '1671',
|
).encode('ascii')).decode('utf-8'), video_id)
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Soodhu Kavvuum',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'description': 'md5:b40f2bf7320b4f9414f3780817b2af8c',
|
|
||||||
}
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
request = sanitized_Request(url)
|
webpage = self._download_webpage(url, video_id)
|
||||||
request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0')
|
|
||||||
webpage = self._download_webpage(request, video_id)
|
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(r'<h3>([^<]+)</h3>', webpage, 'title')
|
||||||
r'<h1><a[^>]+class=["\']movie-title["\'][^>]*>(.+?)</a></h1>',
|
|
||||||
webpage, 'title')
|
|
||||||
|
|
||||||
video_id = self._search_regex(
|
player_params = extract_attributes(self._search_regex(
|
||||||
r'data-movieid=["\'](\d+)', webpage, 'video id', default=video_id)
|
r'(<section[^>]+id="UIVideoPlayer"[^>]+>)', webpage, 'player parameters'))
|
||||||
|
|
||||||
m3u8_url = self._download_webpage(
|
page_id = self._html_search_regex(
|
||||||
'http://cdn.einthusan.com/geturl/%s/hd/London,Washington,Toronto,Dallas,San,Sydney/'
|
'<html[^>]+data-pageid="([^"]+)"', webpage, 'page ID')
|
||||||
% video_id, video_id, headers={'Referer': url})
|
video_data = self._download_json(
|
||||||
formats = self._extract_m3u8_formats(
|
'https://einthusan.tv/ajax/movie/watch/%s/' % video_id, video_id,
|
||||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native')
|
data=urlencode_postdata({
|
||||||
|
'xEvent': 'UIVideoPlayer.PingOutcome',
|
||||||
|
'xJson': json.dumps({
|
||||||
|
'EJOutcomes': player_params['data-ejpingables'],
|
||||||
|
'NativeHLS': False
|
||||||
|
}),
|
||||||
|
'arcVersion': 3,
|
||||||
|
'appVersion': 59,
|
||||||
|
'gorilla.csrf.Token': page_id,
|
||||||
|
}))['Data']
|
||||||
|
|
||||||
description = self._html_search_meta('description', webpage)
|
if isinstance(video_data, compat_str) and video_data.startswith('/ratelimited/'):
|
||||||
|
raise ExtractorError(
|
||||||
|
'Download rate reached. Please try again later.', expected=True)
|
||||||
|
|
||||||
|
ej_links = self._decrypt(video_data['EJLinks'], video_id)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
m3u8_url = ej_links.get('HLSLink')
|
||||||
|
if m3u8_url:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native'))
|
||||||
|
|
||||||
|
mp4_url = ej_links.get('MP4Link')
|
||||||
|
if mp4_url:
|
||||||
|
formats.append({
|
||||||
|
'url': mp4_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
description = get_elements_by_class('synopsis', webpage)[0]
|
||||||
thumbnail = self._html_search_regex(
|
thumbnail = self._html_search_regex(
|
||||||
r'''<a class="movie-cover-wrapper".*?><img src=["'](.*?)["'].*?/></a>''',
|
r'''<img[^>]+src=(["'])(?P<url>(?!\1).+?/moviecovers/(?!\1).+?)\1''',
|
||||||
webpage, "thumbnail url", fatal=False)
|
webpage, 'thumbnail url', fatal=False, group='url')
|
||||||
if thumbnail is not None:
|
if thumbnail is not None:
|
||||||
thumbnail = compat_urlparse.urljoin(url, remove_start(thumbnail, '..'))
|
thumbnail = compat_urlparse.urljoin(url, thumbnail)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -202,6 +202,7 @@ from .commonprotocols import (
|
|||||||
RtmpIE,
|
RtmpIE,
|
||||||
)
|
)
|
||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
|
from .corus import CorusIE
|
||||||
from .cracked import CrackedIE
|
from .cracked import CrackedIE
|
||||||
from .crackle import CrackleIE
|
from .crackle import CrackleIE
|
||||||
from .criterion import CriterionIE
|
from .criterion import CriterionIE
|
||||||
@ -349,6 +350,7 @@ from .gameone import (
|
|||||||
from .gamersyde import GamersydeIE
|
from .gamersyde import GamersydeIE
|
||||||
from .gamespot import GameSpotIE
|
from .gamespot import GameSpotIE
|
||||||
from .gamestar import GameStarIE
|
from .gamestar import GameStarIE
|
||||||
|
from .gaskrank import GaskrankIE
|
||||||
from .gazeta import GazetaIE
|
from .gazeta import GazetaIE
|
||||||
from .gdcvault import GDCVaultIE
|
from .gdcvault import GDCVaultIE
|
||||||
from .generic import GenericIE
|
from .generic import GenericIE
|
||||||
@ -380,10 +382,7 @@ from .heise import HeiseIE
|
|||||||
from .hellporno import HellPornoIE
|
from .hellporno import HellPornoIE
|
||||||
from .helsinki import HelsinkiIE
|
from .helsinki import HelsinkiIE
|
||||||
from .hentaistigma import HentaiStigmaIE
|
from .hentaistigma import HentaiStigmaIE
|
||||||
from .hgtv import (
|
from .hgtv import HGTVComShowIE
|
||||||
HGTVIE,
|
|
||||||
HGTVComShowIE,
|
|
||||||
)
|
|
||||||
from .historicfilms import HistoricFilmsIE
|
from .historicfilms import HistoricFilmsIE
|
||||||
from .hitbox import HitboxIE, HitboxLiveIE
|
from .hitbox import HitboxIE, HitboxLiveIE
|
||||||
from .hitrecord import HitRecordIE
|
from .hitrecord import HitRecordIE
|
||||||
@ -837,6 +836,7 @@ from .sbs import SBSIE
|
|||||||
from .scivee import SciVeeIE
|
from .scivee import SciVeeIE
|
||||||
from .screencast import ScreencastIE
|
from .screencast import ScreencastIE
|
||||||
from .screencastomatic import ScreencastOMaticIE
|
from .screencastomatic import ScreencastOMaticIE
|
||||||
|
from .scrippsnetworks import ScrippsNetworksWatchIE
|
||||||
from .seeker import SeekerIE
|
from .seeker import SeekerIE
|
||||||
from .senateisvp import SenateISVPIE
|
from .senateisvp import SenateISVPIE
|
||||||
from .sendtonews import SendtoNewsIE
|
from .sendtonews import SendtoNewsIE
|
||||||
@ -894,6 +894,7 @@ from .sport5 import Sport5IE
|
|||||||
from .sportbox import SportBoxEmbedIE
|
from .sportbox import SportBoxEmbedIE
|
||||||
from .sportdeutschland import SportDeutschlandIE
|
from .sportdeutschland import SportDeutschlandIE
|
||||||
from .sportschau import SportschauIE
|
from .sportschau import SportschauIE
|
||||||
|
from .sprout import SproutIE
|
||||||
from .srgssr import (
|
from .srgssr import (
|
||||||
SRGSSRIE,
|
SRGSSRIE,
|
||||||
SRGSSRPlayIE,
|
SRGSSRPlayIE,
|
||||||
@ -1016,6 +1017,7 @@ from .tvplay import (
|
|||||||
TVPlayIE,
|
TVPlayIE,
|
||||||
ViafreeIE,
|
ViafreeIE,
|
||||||
)
|
)
|
||||||
|
from .tvplayer import TVPlayerIE
|
||||||
from .tweakers import TweakersIE
|
from .tweakers import TweakersIE
|
||||||
from .twentyfourvideo import TwentyFourVideoIE
|
from .twentyfourvideo import TwentyFourVideoIE
|
||||||
from .twentymin import TwentyMinutenIE
|
from .twentymin import TwentyMinutenIE
|
||||||
@ -1095,6 +1097,7 @@ from .videomore import (
|
|||||||
VideomoreSeasonIE,
|
VideomoreSeasonIE,
|
||||||
)
|
)
|
||||||
from .videopremium import VideoPremiumIE
|
from .videopremium import VideoPremiumIE
|
||||||
|
from .videopress import VideoPressIE
|
||||||
from .vidio import VidioIE
|
from .vidio import VidioIE
|
||||||
from .vidme import (
|
from .vidme import (
|
||||||
VidmeIE,
|
VidmeIE,
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
@ -134,6 +135,46 @@ class FacebookIE(InfoExtractor):
|
|||||||
'upload_date': '20161030',
|
'upload_date': '20161030',
|
||||||
'uploader': 'CNN',
|
'uploader': 'CNN',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
||||||
|
'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1417995061575415',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:a7b86ca673f51800cd54687b7f4012fe',
|
||||||
|
'timestamp': 1486648217,
|
||||||
|
'upload_date': '20170209',
|
||||||
|
'uploader': 'Yaroslav Korpan',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.facebook.com/LaGuiaDelVaron/posts/1072691702860471',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1072691702860471',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:ae2d22a93fbb12dad20dc393a869739d',
|
||||||
|
'timestamp': 1477305000,
|
||||||
|
'upload_date': '20161024',
|
||||||
|
'uploader': 'La Guía Del Varón',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1396382447100162',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:e2d2700afdf84e121f5d0f999bad13a3',
|
||||||
|
'timestamp': 1486035494,
|
||||||
|
'upload_date': '20170202',
|
||||||
|
'uploader': 'Elisabeth Ahtn',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -249,7 +290,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
for item in instances:
|
for item in instances:
|
||||||
if item[1][0] == 'VideoConfig':
|
if item[1][0] == 'VideoConfig':
|
||||||
video_item = item[2][0]
|
video_item = item[2][0]
|
||||||
if video_item.get('video_id') == video_id:
|
if video_item.get('video_id'):
|
||||||
return video_item['videoData']
|
return video_item['videoData']
|
||||||
|
|
||||||
server_js_data = self._parse_json(self._search_regex(
|
server_js_data = self._parse_json(self._search_regex(
|
||||||
@ -262,7 +303,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
if not video_data:
|
if not video_data:
|
||||||
server_js_data = self._parse_json(
|
server_js_data = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+stream_pagelet',
|
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall)',
|
||||||
webpage, 'js data', default='{}'),
|
webpage, 'js data', default='{}'),
|
||||||
video_id, transform_source=js_to_json, fatal=False)
|
video_id, transform_source=js_to_json, fatal=False)
|
||||||
if server_js_data:
|
if server_js_data:
|
||||||
|
123
youtube_dl/extractor/gaskrank.py
Normal file
123
youtube_dl/extractor/gaskrank.py
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GaskrankIE(InfoExtractor):
|
||||||
|
"""InfoExtractor for gaskrank.tv"""
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?gaskrank\.tv/tv/(?P<categories>[^/]+)/(?P<id>[^/]+)\.html?'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.gaskrank.tv/tv/motorrad-fun/strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden.htm',
|
||||||
|
'md5': '1ae88dbac97887d85ebd1157a95fc4f9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '201601/26955',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Strike! Einparken können nur Männer - Flurschaden hält sich in Grenzen *lol*',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'categories': ['motorrad-fun'],
|
||||||
|
'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden',
|
||||||
|
'uploader_id': 'Bikefun',
|
||||||
|
'upload_date': '20170110',
|
||||||
|
'uploader_url': None,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm',
|
||||||
|
'md5': 'c33ee32c711bc6c8224bfcbe62b23095',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '201106/15920',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Isle of Man - Michael Dunlop vs Guy Martin - schwindelig kucken',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'categories': ['racing'],
|
||||||
|
'display_id': 'isle-of-man-tt-2011-michael-du-15920',
|
||||||
|
'uploader_id': 'IOM',
|
||||||
|
'upload_date': '20160506',
|
||||||
|
'uploader_url': 'www.iomtt.com',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
"""extract information from gaskrank.tv"""
|
||||||
|
def fix_json(code):
|
||||||
|
"""Removes trailing comma in json: {{},} --> {{}}"""
|
||||||
|
return re.sub(r',\s*}', r'}', js_to_json(code))
|
||||||
|
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
categories = [re.match(self._VALID_URL, url).group('categories')]
|
||||||
|
title = self._search_regex(
|
||||||
|
r'movieName\s*:\s*\'([^\']*)\'',
|
||||||
|
webpage, 'title')
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r'poster\s*:\s*\'([^\']*)\'',
|
||||||
|
webpage, 'thumbnail', default=None)
|
||||||
|
|
||||||
|
mobj = re.search(
|
||||||
|
r'Video von:\s*(?P<uploader_id>[^|]*?)\s*\|\s*vom:\s*(?P<upload_date>[0-9][0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9])',
|
||||||
|
webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
uploader_id = mobj.groupdict().get('uploader_id')
|
||||||
|
upload_date = unified_strdate(mobj.groupdict().get('upload_date'))
|
||||||
|
|
||||||
|
uploader_url = self._search_regex(
|
||||||
|
r'Homepage:\s*<[^>]*>(?P<uploader_url>[^<]*)',
|
||||||
|
webpage, 'uploader_url', default=None)
|
||||||
|
tags = re.findall(
|
||||||
|
r'/tv/tags/[^/]+/"\s*>(?P<tag>[^<]*?)<',
|
||||||
|
webpage)
|
||||||
|
|
||||||
|
view_count = self._search_regex(
|
||||||
|
r'class\s*=\s*"gkRight"(?:[^>]*>\s*<[^>]*)*icon-eye-open(?:[^>]*>\s*<[^>]*)*>\s*(?P<view_count>[0-9\.]*)',
|
||||||
|
webpage, 'view_count', default=None)
|
||||||
|
if view_count:
|
||||||
|
view_count = int_or_none(view_count.replace('.', ''))
|
||||||
|
|
||||||
|
average_rating = self._search_regex(
|
||||||
|
r'itemprop\s*=\s*"ratingValue"[^>]*>\s*(?P<average_rating>[0-9,]+)',
|
||||||
|
webpage, 'average_rating')
|
||||||
|
if average_rating:
|
||||||
|
average_rating = float_or_none(average_rating.replace(',', '.'))
|
||||||
|
|
||||||
|
playlist = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'playlist\s*:\s*\[([^\]]*)\]',
|
||||||
|
webpage, 'playlist', default='{}'),
|
||||||
|
display_id, transform_source=fix_json, fatal=False)
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'https?://movies\.gaskrank\.tv/([^-]*?)(-[^\.]*)?\.mp4',
|
||||||
|
playlist.get('0').get('src'), 'video id')
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for key in playlist:
|
||||||
|
formats.append({
|
||||||
|
'url': playlist[key]['src'],
|
||||||
|
'format_id': key,
|
||||||
|
'quality': playlist[key].get('quality')})
|
||||||
|
self._sort_formats(formats, field_preference=['format_id'])
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'categories': categories,
|
||||||
|
'display_id': display_id,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'uploader_url': uploader_url,
|
||||||
|
'tags': tags,
|
||||||
|
'view_count': view_count,
|
||||||
|
'average_rating': average_rating,
|
||||||
|
}
|
@ -29,6 +29,7 @@ from ..utils import (
|
|||||||
UnsupportedError,
|
UnsupportedError,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
from .commonprotocols import RtmpIE
|
||||||
from .brightcove import (
|
from .brightcove import (
|
||||||
BrightcoveLegacyIE,
|
BrightcoveLegacyIE,
|
||||||
BrightcoveNewIE,
|
BrightcoveNewIE,
|
||||||
@ -81,6 +82,7 @@ from .videa import VideaIE
|
|||||||
from .twentymin import TwentyMinutenIE
|
from .twentymin import TwentyMinutenIE
|
||||||
from .ustream import UstreamIE
|
from .ustream import UstreamIE
|
||||||
from .openload import OpenloadIE
|
from .openload import OpenloadIE
|
||||||
|
from .videopress import VideoPressIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@ -946,6 +948,19 @@ class GenericIE(InfoExtractor):
|
|||||||
'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
|
'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# jwplayer rtmp
|
||||||
|
{
|
||||||
|
'url': 'http://www.suffolk.edu/sjc/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sjclive',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
|
||||||
|
'uploader': 'www.suffolk.edu',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
},
|
||||||
# rtl.nl embed
|
# rtl.nl embed
|
||||||
{
|
{
|
||||||
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
|
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
|
||||||
@ -976,19 +991,6 @@ class GenericIE(InfoExtractor):
|
|||||||
'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
|
'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# Kaltura embed protected with referrer
|
|
||||||
{
|
|
||||||
'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '1_g4fbemnq',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Violetta - Achter De Schermen - Ruggero',
|
|
||||||
'description': 'Achter de schermen met Ruggero',
|
|
||||||
'timestamp': 1435133761,
|
|
||||||
'upload_date': '20150624',
|
|
||||||
'uploader_id': 'echojecka',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
# Kaltura embed with single quotes
|
# Kaltura embed with single quotes
|
||||||
{
|
{
|
||||||
'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
|
'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
|
||||||
@ -1473,6 +1475,21 @@ class GenericIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'add_ie': [TwentyMinutenIE.ie_key()],
|
'add_ie': [TwentyMinutenIE.ie_key()],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# VideoPress embed
|
||||||
|
'url': 'https://en.support.wordpress.com/videopress/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'OcobLTqC',
|
||||||
|
'ext': 'm4v',
|
||||||
|
'title': 'IMG_5786',
|
||||||
|
'timestamp': 1435711927,
|
||||||
|
'upload_date': '20150701',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'add_ie': [VideoPressIE.ie_key()],
|
||||||
}
|
}
|
||||||
# {
|
# {
|
||||||
# # TODO: find another test
|
# # TODO: find another test
|
||||||
@ -2320,8 +2337,9 @@ class GenericIE(InfoExtractor):
|
|||||||
'Channel': 'channel',
|
'Channel': 'channel',
|
||||||
'ChannelList': 'channel_list',
|
'ChannelList': 'channel_list',
|
||||||
}
|
}
|
||||||
return self.url_result('limelight:%s:%s' % (
|
return self.url_result(smuggle_url('limelight:%s:%s' % (
|
||||||
lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
|
lm[mobj.group(1)], mobj.group(2)), {'source_url': url}),
|
||||||
|
'Limelight%s' % mobj.group(1), mobj.group(2))
|
||||||
|
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'''(?sx)
|
r'''(?sx)
|
||||||
@ -2331,7 +2349,9 @@ class GenericIE(InfoExtractor):
|
|||||||
value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
|
value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
|
||||||
''', webpage)
|
''', webpage)
|
||||||
if mobj:
|
if mobj:
|
||||||
return self.url_result('limelight:media:%s' % mobj.group('id'))
|
return self.url_result(smuggle_url(
|
||||||
|
'limelight:media:%s' % mobj.group('id'),
|
||||||
|
{'source_url': url}), 'LimelightMedia', mobj.group('id'))
|
||||||
|
|
||||||
# Look for AdobeTVVideo embeds
|
# Look for AdobeTVVideo embeds
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
@ -2438,6 +2458,12 @@ class GenericIE(InfoExtractor):
|
|||||||
return _playlist_from_matches(
|
return _playlist_from_matches(
|
||||||
openload_urls, ie=OpenloadIE.ie_key())
|
openload_urls, ie=OpenloadIE.ie_key())
|
||||||
|
|
||||||
|
# Look for VideoPress embeds
|
||||||
|
videopress_urls = VideoPressIE._extract_urls(webpage)
|
||||||
|
if videopress_urls:
|
||||||
|
return _playlist_from_matches(
|
||||||
|
videopress_urls, ie=VideoPressIE.ie_key())
|
||||||
|
|
||||||
# Looking for http://schema.org/VideoObject
|
# Looking for http://schema.org/VideoObject
|
||||||
json_ld = self._search_json_ld(
|
json_ld = self._search_json_ld(
|
||||||
webpage, video_id, default={}, expected_type='VideoObject')
|
webpage, video_id, default={}, expected_type='VideoObject')
|
||||||
@ -2465,6 +2491,8 @@ class GenericIE(InfoExtractor):
|
|||||||
def check_video(vurl):
|
def check_video(vurl):
|
||||||
if YoutubeIE.suitable(vurl):
|
if YoutubeIE.suitable(vurl):
|
||||||
return True
|
return True
|
||||||
|
if RtmpIE.suitable(vurl):
|
||||||
|
return True
|
||||||
vpath = compat_urlparse.urlparse(vurl).path
|
vpath = compat_urlparse.urlparse(vurl).path
|
||||||
vext = determine_ext(vpath)
|
vext = determine_ext(vpath)
|
||||||
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
|
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
|
||||||
@ -2572,6 +2600,15 @@ class GenericIE(InfoExtractor):
|
|||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if RtmpIE.suitable(video_url):
|
||||||
|
entry_info_dict.update({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': RtmpIE.ie_key(),
|
||||||
|
'url': video_url,
|
||||||
|
})
|
||||||
|
entries.append(entry_info_dict)
|
||||||
|
continue
|
||||||
|
|
||||||
ext = determine_ext(video_url)
|
ext = determine_ext(video_url)
|
||||||
if ext == 'smil':
|
if ext == 'smil':
|
||||||
entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
|
entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
|
||||||
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .adobepass import AdobePassIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
@ -13,15 +13,30 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class GoIE(InfoExtractor):
|
class GoIE(AdobePassIE):
|
||||||
_BRANDS = {
|
_SITE_INFO = {
|
||||||
'abc': '001',
|
'abc': {
|
||||||
'freeform': '002',
|
'brand': '001',
|
||||||
'watchdisneychannel': '004',
|
'requestor_id': 'ABC',
|
||||||
'watchdisneyjunior': '008',
|
},
|
||||||
'watchdisneyxd': '009',
|
'freeform': {
|
||||||
|
'brand': '002',
|
||||||
|
'requestor_id': 'ABCFamily',
|
||||||
|
},
|
||||||
|
'watchdisneychannel': {
|
||||||
|
'brand': '004',
|
||||||
|
'requestor_id': 'Disney',
|
||||||
|
},
|
||||||
|
'watchdisneyjunior': {
|
||||||
|
'brand': '008',
|
||||||
|
'requestor_id': 'DisneyJunior',
|
||||||
|
},
|
||||||
|
'watchdisneyxd': {
|
||||||
|
'brand': '009',
|
||||||
|
'requestor_id': 'DisneyXD',
|
||||||
|
}
|
||||||
}
|
}
|
||||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_BRANDS.keys())
|
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
|
'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -47,7 +62,8 @@ class GoIE(InfoExtractor):
|
|||||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||||
r'data-video-id=["\']*VDKA(\w+)', webpage, 'video id')
|
r'data-video-id=["\']*VDKA(\w+)', webpage, 'video id')
|
||||||
brand = self._BRANDS[sub_domain]
|
site_info = self._SITE_INFO[sub_domain]
|
||||||
|
brand = site_info['brand']
|
||||||
video_data = self._download_json(
|
video_data = self._download_json(
|
||||||
'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id),
|
'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id),
|
||||||
video_id)['video'][0]
|
video_id)['video'][0]
|
||||||
@ -63,14 +79,26 @@ class GoIE(InfoExtractor):
|
|||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
video_type = video_data.get('type')
|
video_type = video_data.get('type')
|
||||||
if video_type == 'lf':
|
if video_type == 'lf':
|
||||||
|
data = {
|
||||||
|
'video_id': video_data['id'],
|
||||||
|
'video_type': video_type,
|
||||||
|
'brand': brand,
|
||||||
|
'device': '001',
|
||||||
|
}
|
||||||
|
if video_data.get('accesslevel') == '1':
|
||||||
|
requestor_id = site_info['requestor_id']
|
||||||
|
resource = self._get_mvpd_resource(
|
||||||
|
requestor_id, title, video_id, None)
|
||||||
|
auth = self._extract_mvpd_auth(
|
||||||
|
url, video_id, requestor_id, resource)
|
||||||
|
data.update({
|
||||||
|
'token': auth,
|
||||||
|
'token_type': 'ap',
|
||||||
|
'adobe_requestor_id': requestor_id,
|
||||||
|
})
|
||||||
entitlement = self._download_json(
|
entitlement = self._download_json(
|
||||||
'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
|
'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
|
||||||
video_id, data=urlencode_postdata({
|
video_id, data=urlencode_postdata(data), headers=self.geo_verification_headers())
|
||||||
'video_id': video_data['id'],
|
|
||||||
'video_type': video_type,
|
|
||||||
'brand': brand,
|
|
||||||
'device': '001',
|
|
||||||
}))
|
|
||||||
errors = entitlement.get('errors', {}).get('errors', [])
|
errors = entitlement.get('errors', {}).get('errors', [])
|
||||||
if errors:
|
if errors:
|
||||||
error_message = ', '.join([error['message'] for error in errors])
|
error_message = ', '.join([error['message'] for error in errors])
|
||||||
|
@ -6,6 +6,7 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
lowercase_escape,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -13,12 +14,12 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
|
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
|
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
|
||||||
'md5': '881f7700aec4f538571fa1e0eed4a7b6',
|
'md5': 'd109872761f7e7ecf353fa108c0dbe1e',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Big Buck Bunny.mp4',
|
'title': 'Big Buck Bunny.mp4',
|
||||||
'duration': 46,
|
'duration': 45,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# video id is longer than 28 characters
|
# video id is longer than 28 characters
|
||||||
@ -55,7 +56,7 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape')
|
'http://docs.google.com/file/d/%s' % video_id, video_id)
|
||||||
|
|
||||||
reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
||||||
if reason:
|
if reason:
|
||||||
@ -74,7 +75,7 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
resolution = fmt.split('/')[1]
|
resolution = fmt.split('/')[1]
|
||||||
width, height = resolution.split('x')
|
width, height = resolution.split('x')
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': fmt_url,
|
'url': lowercase_escape(fmt_url),
|
||||||
'format_id': fmt_id,
|
'format_id': fmt_id,
|
||||||
'resolution': resolution,
|
'resolution': resolution,
|
||||||
'width': int_or_none(width),
|
'width': int_or_none(width),
|
||||||
|
@ -2,50 +2,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
|
||||||
int_or_none,
|
|
||||||
js_to_json,
|
|
||||||
smuggle_url,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class HGTVIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?hgtv\.ca/[^/]+/video/(?P<id>[^/]+)/video.html'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.hgtv.ca/homefree/video/overnight-success/video.html?v=738081859718&p=1&s=da#video',
|
|
||||||
'md5': '',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'aFH__I_5FBOX',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Overnight Success',
|
|
||||||
'description': 'After weeks of hard work, high stakes, breakdowns and pep talks, the final 2 contestants compete to win the ultimate dream.',
|
|
||||||
'uploader': 'SHWM-NEW',
|
|
||||||
'timestamp': 1470320034,
|
|
||||||
'upload_date': '20160804',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
embed_vars = self._parse_json(self._search_regex(
|
|
||||||
r'(?s)embed_vars\s*=\s*({.*?});',
|
|
||||||
webpage, 'embed vars'), display_id, js_to_json)
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': smuggle_url(
|
|
||||||
'http://link.theplatform.com/s/dtjsEC/%s?mbr=true&manifest=m3u' % embed_vars['pid'], {
|
|
||||||
'force_smil_url': True
|
|
||||||
}),
|
|
||||||
'series': embed_vars.get('show'),
|
|
||||||
'season_number': int_or_none(embed_vars.get('season')),
|
|
||||||
'episode_number': int_or_none(embed_vars.get('episode')),
|
|
||||||
'ie_key': 'ThePlatform',
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class HGTVComShowIE(InfoExtractor):
|
class HGTVComShowIE(InfoExtractor):
|
||||||
|
@ -34,11 +34,9 @@ class HotStarIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s'
|
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True, query=None):
|
||||||
_GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s'
|
json_data = super(HotStarIE, self)._download_json(
|
||||||
|
url_or_request, video_id, note, fatal=fatal, query=query)
|
||||||
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True):
|
|
||||||
json_data = super(HotStarIE, self)._download_json(url_or_request, video_id, note, fatal=fatal)
|
|
||||||
if json_data['resultCode'] != 'OK':
|
if json_data['resultCode'] != 'OK':
|
||||||
if fatal:
|
if fatal:
|
||||||
raise ExtractorError(json_data['errorDescription'])
|
raise ExtractorError(json_data['errorDescription'])
|
||||||
@ -48,20 +46,37 @@ class HotStarIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
video_data = self._download_json(
|
video_data = self._download_json(
|
||||||
self._GET_CONTENT_TEMPLATE % video_id,
|
'http://account.hotstar.com/AVS/besc', video_id, query={
|
||||||
video_id)['contentInfo'][0]
|
'action': 'GetAggregatedContentDetails',
|
||||||
|
'channel': 'PCTV',
|
||||||
|
'contentId': video_id,
|
||||||
|
})['contentInfo'][0]
|
||||||
|
title = video_data['episodeTitle']
|
||||||
|
|
||||||
|
if video_data.get('encrypted') == 'Y':
|
||||||
|
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
# PCTV for extracting f4m manifest
|
for f in ('JIO',):
|
||||||
for f in ('TABLET',):
|
|
||||||
format_data = self._download_json(
|
format_data = self._download_json(
|
||||||
self._GET_CDN_TEMPLATE % (f, video_id, 'VOD'),
|
'http://getcdn.hotstar.com/AVS/besc',
|
||||||
video_id, 'Downloading %s JSON metadata' % f, fatal=False)
|
video_id, 'Downloading %s JSON metadata' % f,
|
||||||
|
fatal=False, query={
|
||||||
|
'action': 'GetCDN',
|
||||||
|
'asJson': 'Y',
|
||||||
|
'channel': f,
|
||||||
|
'id': video_id,
|
||||||
|
'type': 'VOD',
|
||||||
|
})
|
||||||
if format_data:
|
if format_data:
|
||||||
format_url = format_data['src']
|
format_url = format_data.get('src')
|
||||||
|
if not format_url:
|
||||||
|
continue
|
||||||
ext = determine_ext(format_url)
|
ext = determine_ext(format_url)
|
||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
elif ext == 'f4m':
|
elif ext == 'f4m':
|
||||||
# produce broken files
|
# produce broken files
|
||||||
continue
|
continue
|
||||||
@ -75,9 +90,12 @@ class HotStarIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_data['episodeTitle'],
|
'title': title,
|
||||||
'description': video_data.get('description'),
|
'description': video_data.get('description'),
|
||||||
'duration': int_or_none(video_data.get('duration')),
|
'duration': int_or_none(video_data.get('duration')),
|
||||||
'timestamp': int_or_none(video_data.get('broadcastDate')),
|
'timestamp': int_or_none(video_data.get('broadcastDate')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'episode': title,
|
||||||
|
'episode_number': int_or_none(video_data.get('episodeNumber')),
|
||||||
|
'series': video_data.get('contentTitle'),
|
||||||
}
|
}
|
||||||
|
@ -173,11 +173,12 @@ class IqiyiIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.iqiyi.com/v_19rrhnnclk.html',
|
'url': 'http://www.iqiyi.com/v_19rrhnnclk.html',
|
||||||
'md5': '667171934041350c5de3f5015f7f1152',
|
'md5': 'b7dc800a4004b1b57749d9abae0472da',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'e3f585b550a280af23c98b6cb2be19fb',
|
'id': 'e3f585b550a280af23c98b6cb2be19fb',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '名侦探柯南 国语版:第752集 迫近灰原秘密的黑影 下篇',
|
# This can be either Simplified Chinese or Traditional Chinese
|
||||||
|
'title': r're:^(?:名侦探柯南 国语版:第752集 迫近灰原秘密的黑影 下篇|名偵探柯南 國語版:第752集 迫近灰原秘密的黑影 下篇)$',
|
||||||
},
|
},
|
||||||
'skip': 'Geo-restricted to China',
|
'skip': 'Geo-restricted to China',
|
||||||
}, {
|
}, {
|
||||||
|
@ -3,14 +3,18 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urllib_parse_urlparse
|
from ..compat import compat_urllib_parse_urlparse
|
||||||
from ..utils import remove_end
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
mimetype2ext,
|
||||||
|
remove_end,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class IwaraIE(InfoExtractor):
|
class IwaraIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos/(?P<id>[a-zA-Z0-9]+)'
|
_VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos/(?P<id>[a-zA-Z0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD',
|
'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD',
|
||||||
'md5': '1d53866b2c514b23ed69e4352fdc9839',
|
# md5 is unstable
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'amVwUl1EHpAD9RD',
|
'id': 'amVwUl1EHpAD9RD',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -23,17 +27,17 @@ class IwaraIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0B1LvuHnL-sRFNXB1WHNqbGw4SXc',
|
'id': '0B1LvuHnL-sRFNXB1WHNqbGw4SXc',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '[3D Hentai] Kyonyu Ã\x97 Genkai Ã\x97 Emaki Shinobi Girls.mp4',
|
'title': '[3D Hentai] Kyonyu × Genkai × Emaki Shinobi Girls.mp4',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
},
|
},
|
||||||
'add_ie': ['GoogleDrive'],
|
'add_ie': ['GoogleDrive'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.iwara.tv/videos/nawkaumd6ilezzgq',
|
'url': 'http://www.iwara.tv/videos/nawkaumd6ilezzgq',
|
||||||
'md5': '1d85f1e5217d2791626cff5ec83bb189',
|
# md5 is unstable
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '6liAP9s2Ojc',
|
'id': '6liAP9s2Ojc',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'age_limit': 0,
|
'age_limit': 18,
|
||||||
'title': '[MMD] Do It Again Ver.2 [1080p 60FPS] (Motion,Camera,Wav+DL)',
|
'title': '[MMD] Do It Again Ver.2 [1080p 60FPS] (Motion,Camera,Wav+DL)',
|
||||||
'description': 'md5:590c12c0df1443d833fbebe05da8c47a',
|
'description': 'md5:590c12c0df1443d833fbebe05da8c47a',
|
||||||
'upload_date': '20160910',
|
'upload_date': '20160910',
|
||||||
@ -52,9 +56,9 @@ class IwaraIE(InfoExtractor):
|
|||||||
# ecchi is 'sexy' in Japanese
|
# ecchi is 'sexy' in Japanese
|
||||||
age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0
|
age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0
|
||||||
|
|
||||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
video_data = self._download_json('http://www.iwara.tv/api/video/%s' % video_id, video_id)
|
||||||
|
|
||||||
if not entries:
|
if not video_data:
|
||||||
iframe_url = self._html_search_regex(
|
iframe_url = self._html_search_regex(
|
||||||
r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1',
|
r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1',
|
||||||
webpage, 'iframe URL', group='url')
|
webpage, 'iframe URL', group='url')
|
||||||
@ -67,11 +71,25 @@ class IwaraIE(InfoExtractor):
|
|||||||
title = remove_end(self._html_search_regex(
|
title = remove_end(self._html_search_regex(
|
||||||
r'<title>([^<]+)</title>', webpage, 'title'), ' | Iwara')
|
r'<title>([^<]+)</title>', webpage, 'title'), ' | Iwara')
|
||||||
|
|
||||||
info_dict = entries[0]
|
formats = []
|
||||||
info_dict.update({
|
for a_format in video_data:
|
||||||
|
format_id = a_format.get('resolution')
|
||||||
|
height = int_or_none(self._search_regex(
|
||||||
|
r'(\d+)p', format_id, 'height', default=None))
|
||||||
|
formats.append({
|
||||||
|
'url': a_format['uri'],
|
||||||
|
'format_id': format_id,
|
||||||
|
'ext': mimetype2ext(a_format.get('mime')) or 'mp4',
|
||||||
|
'height': height,
|
||||||
|
'width': int_or_none(height / 9.0 * 16.0 if height else None),
|
||||||
|
'quality': 1 if format_id == 'Source' else 0,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
})
|
'formats': formats,
|
||||||
|
}
|
||||||
return info_dict
|
|
||||||
|
@ -23,11 +23,11 @@ class KalturaIE(InfoExtractor):
|
|||||||
(?:
|
(?:
|
||||||
kaltura:(?P<partner_id>\d+):(?P<id>[0-9a-z_]+)|
|
kaltura:(?P<partner_id>\d+):(?P<id>[0-9a-z_]+)|
|
||||||
https?://
|
https?://
|
||||||
(:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/
|
(:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
|
||||||
(?:
|
(?:
|
||||||
(?:
|
(?:
|
||||||
# flash player
|
# flash player
|
||||||
index\.php/kwidget|
|
index\.php/(?:kwidget|extwidget/preview)|
|
||||||
# html5 player
|
# html5 player
|
||||||
html5/html5lib/[^/]+/mwEmbedFrame\.php
|
html5/html5lib/[^/]+/mwEmbedFrame\.php
|
||||||
)
|
)
|
||||||
@ -94,6 +94,14 @@ class KalturaIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://www.kaltura.com/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
|
||||||
|
'only_matching': True,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -112,7 +120,7 @@ class KalturaIE(InfoExtractor):
|
|||||||
re.search(
|
re.search(
|
||||||
r'''(?xs)
|
r'''(?xs)
|
||||||
(?P<q1>["\'])
|
(?P<q1>["\'])
|
||||||
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/(?:(?!(?P=q1)).)*(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
|
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
|
||||||
(?P=q1).*?
|
(?P=q1).*?
|
||||||
(?:
|
(?:
|
||||||
entry_?[Ii]d|
|
entry_?[Ii]d|
|
||||||
@ -209,6 +217,8 @@ class KalturaIE(InfoExtractor):
|
|||||||
partner_id = params['wid'][0][1:]
|
partner_id = params['wid'][0][1:]
|
||||||
elif 'p' in params:
|
elif 'p' in params:
|
||||||
partner_id = params['p'][0]
|
partner_id = params['p'][0]
|
||||||
|
elif 'partner_id' in params:
|
||||||
|
partner_id = params['partner_id'][0]
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Invalid URL', expected=True)
|
raise ExtractorError('Invalid URL', expected=True)
|
||||||
if 'entry_id' in params:
|
if 'entry_id' in params:
|
||||||
|
@ -7,20 +7,40 @@ class LemondeIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:.+?\.)?lemonde\.fr/(?:[^/]+/)*(?P<id>[^/]+)\.html'
|
_VALID_URL = r'https?://(?:.+?\.)?lemonde\.fr/(?:[^/]+/)*(?P<id>[^/]+)\.html'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.lemonde.fr/police-justice/video/2016/01/19/comprendre-l-affaire-bygmalion-en-cinq-minutes_4849702_1653578.html',
|
'url': 'http://www.lemonde.fr/police-justice/video/2016/01/19/comprendre-l-affaire-bygmalion-en-cinq-minutes_4849702_1653578.html',
|
||||||
'md5': '01fb3c92de4c12c573343d63e163d302',
|
'md5': 'da120c8722d8632eec6ced937536cc98',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'lqm3kl',
|
'id': 'lqm3kl',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "Comprendre l'affaire Bygmalion en 5 minutes",
|
'title': "Comprendre l'affaire Bygmalion en 5 minutes",
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'duration': 320,
|
'duration': 309,
|
||||||
'upload_date': '20160119',
|
'upload_date': '20160119',
|
||||||
'timestamp': 1453194778,
|
'timestamp': 1453194778,
|
||||||
'uploader_id': '3pmkp',
|
'uploader_id': '3pmkp',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# standard iframe embed
|
||||||
|
'url': 'http://www.lemonde.fr/les-decodeurs/article/2016/10/18/tout-comprendre-du-ceta-le-petit-cousin-du-traite-transatlantique_5015920_4355770.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'uzsxms',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "CETA : quelles suites pour l'accord commercial entre l'Europe et le Canada ?",
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'duration': 325,
|
||||||
|
'upload_date': '20161021',
|
||||||
|
'timestamp': 1477044540,
|
||||||
|
'uploader_id': '3pmkp',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://redaction.actu.lemonde.fr/societe/video/2016/01/18/calais-debut-des-travaux-de-defrichement-dans-la-jungle_4849233_3224.html',
|
'url': 'http://redaction.actu.lemonde.fr/societe/video/2016/01/18/calais-debut-des-travaux-de-defrichement-dans-la-jungle_4849233_3224.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# YouTube embeds
|
||||||
|
'url': 'http://www.lemonde.fr/pixels/article/2016/12/09/pourquoi-pewdiepie-superstar-de-youtube-a-menace-de-fermer-sa-chaine_5046649_4408996.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -30,5 +50,9 @@ class LemondeIE(InfoExtractor):
|
|||||||
|
|
||||||
digiteka_url = self._proto_relative_url(self._search_regex(
|
digiteka_url = self._proto_relative_url(self._search_regex(
|
||||||
r'url\s*:\s*(["\'])(?P<url>(?:https?://)?//(?:www\.)?(?:digiteka\.net|ultimedia\.com)/deliver/.+?)\1',
|
r'url\s*:\s*(["\'])(?P<url>(?:https?://)?//(?:www\.)?(?:digiteka\.net|ultimedia\.com)/deliver/.+?)\1',
|
||||||
webpage, 'digiteka url', group='url'))
|
webpage, 'digiteka url', group='url', default=None))
|
||||||
return self.url_result(digiteka_url, 'Digiteka')
|
|
||||||
|
if digiteka_url:
|
||||||
|
return self.url_result(digiteka_url, 'Digiteka')
|
||||||
|
|
||||||
|
return self.url_result(url, 'Generic')
|
||||||
|
@ -8,6 +8,7 @@ from ..utils import (
|
|||||||
determine_ext,
|
determine_ext,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
unsmuggle_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -15,20 +16,23 @@ class LimelightBaseIE(InfoExtractor):
|
|||||||
_PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
|
_PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
|
||||||
_API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
|
_API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
|
||||||
|
|
||||||
def _call_playlist_service(self, item_id, method, fatal=True):
|
def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
|
||||||
|
headers = {}
|
||||||
|
if referer:
|
||||||
|
headers['Referer'] = referer
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
|
self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
|
||||||
item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal)
|
item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal, headers=headers)
|
||||||
|
|
||||||
def _call_api(self, organization_id, item_id, method):
|
def _call_api(self, organization_id, item_id, method):
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
self._API_URL % (organization_id, self._API_PATH, item_id, method),
|
self._API_URL % (organization_id, self._API_PATH, item_id, method),
|
||||||
item_id, 'Downloading API %s JSON' % method)
|
item_id, 'Downloading API %s JSON' % method)
|
||||||
|
|
||||||
def _extract(self, item_id, pc_method, mobile_method, meta_method):
|
def _extract(self, item_id, pc_method, mobile_method, meta_method, referer=None):
|
||||||
pc = self._call_playlist_service(item_id, pc_method)
|
pc = self._call_playlist_service(item_id, pc_method, referer=referer)
|
||||||
metadata = self._call_api(pc['orgId'], item_id, meta_method)
|
metadata = self._call_api(pc['orgId'], item_id, meta_method)
|
||||||
mobile = self._call_playlist_service(item_id, mobile_method, fatal=False)
|
mobile = self._call_playlist_service(item_id, mobile_method, fatal=False, referer=referer)
|
||||||
return pc, mobile, metadata
|
return pc, mobile, metadata
|
||||||
|
|
||||||
def _extract_info(self, streams, mobile_urls, properties):
|
def _extract_info(self, streams, mobile_urls, properties):
|
||||||
@ -207,10 +211,13 @@ class LimelightMediaIE(LimelightBaseIE):
|
|||||||
_API_PATH = 'media'
|
_API_PATH = 'media'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
pc, mobile, metadata = self._extract(
|
pc, mobile, metadata = self._extract(
|
||||||
video_id, 'getPlaylistByMediaId', 'getMobilePlaylistByMediaId', 'properties')
|
video_id, 'getPlaylistByMediaId',
|
||||||
|
'getMobilePlaylistByMediaId', 'properties',
|
||||||
|
smuggled_data.get('source_url'))
|
||||||
|
|
||||||
return self._extract_info(
|
return self._extract_info(
|
||||||
pc['playlistItems'][0].get('streams', []),
|
pc['playlistItems'][0].get('streams', []),
|
||||||
@ -247,11 +254,13 @@ class LimelightChannelIE(LimelightBaseIE):
|
|||||||
_API_PATH = 'channels'
|
_API_PATH = 'channels'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
channel_id = self._match_id(url)
|
channel_id = self._match_id(url)
|
||||||
|
|
||||||
pc, mobile, medias = self._extract(
|
pc, mobile, medias = self._extract(
|
||||||
channel_id, 'getPlaylistByChannelId',
|
channel_id, 'getPlaylistByChannelId',
|
||||||
'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1', 'media')
|
'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1',
|
||||||
|
'media', smuggled_data.get('source_url'))
|
||||||
|
|
||||||
entries = [
|
entries = [
|
||||||
self._extract_info(
|
self._extract_info(
|
||||||
|
@ -4,23 +4,26 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .theplatform import ThePlatformIE
|
from .theplatform import ThePlatformIE
|
||||||
|
from .adobepass import AdobePassIE
|
||||||
|
from ..compat import compat_urllib_parse_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class NBCIE(InfoExtractor):
|
class NBCIE(AdobePassIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
|
_VALID_URL = r'https?://(?:www\.)?nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
|
'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '112966',
|
'id': '2848237',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
|
'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
|
||||||
'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
|
'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
|
||||||
@ -69,7 +72,7 @@ class NBCIE(InfoExtractor):
|
|||||||
# HLS streams requires the 'hdnea3' cookie
|
# HLS streams requires the 'hdnea3' cookie
|
||||||
'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
|
'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'n1806',
|
'id': '101528f5a9e8127b107e98c5e6ce4638',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Goliath',
|
'title': 'Goliath',
|
||||||
'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
|
'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
|
||||||
@ -87,21 +90,57 @@ class NBCIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex(
|
info = {
|
||||||
[
|
|
||||||
r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
|
|
||||||
r'<iframe[^>]+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"',
|
|
||||||
r'"embedURL"\s*:\s*"([^"]+)"'
|
|
||||||
],
|
|
||||||
webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
|
|
||||||
if theplatform_url.startswith('//'):
|
|
||||||
theplatform_url = 'http:' + theplatform_url
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'ie_key': 'ThePlatform',
|
'ie_key': 'ThePlatform',
|
||||||
'url': smuggle_url(theplatform_url, {'source_url': url}),
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
}
|
}
|
||||||
|
video_data = None
|
||||||
|
preload = self._search_regex(
|
||||||
|
r'PRELOAD\s*=\s*({.+})', webpage, 'preload data', default=None)
|
||||||
|
if preload:
|
||||||
|
preload_data = self._parse_json(preload, video_id)
|
||||||
|
path = compat_urllib_parse_urlparse(url).path.rstrip('/')
|
||||||
|
entity_id = preload_data.get('xref', {}).get(path)
|
||||||
|
video_data = preload_data.get('entities', {}).get(entity_id)
|
||||||
|
if video_data:
|
||||||
|
query = {
|
||||||
|
'mbr': 'true',
|
||||||
|
'manifest': 'm3u',
|
||||||
|
}
|
||||||
|
video_id = video_data['guid']
|
||||||
|
title = video_data['title']
|
||||||
|
if video_data.get('entitlement') == 'auth':
|
||||||
|
resource = self._get_mvpd_resource(
|
||||||
|
'nbcentertainment', title, video_id,
|
||||||
|
video_data.get('vChipRating'))
|
||||||
|
query['auth'] = self._extract_mvpd_auth(
|
||||||
|
url, video_id, 'nbcentertainment', resource)
|
||||||
|
theplatform_url = smuggle_url(update_url_query(
|
||||||
|
'http://link.theplatform.com/s/NnzsPC/media/guid/2410887629/' + video_id,
|
||||||
|
query), {'force_smil_url': True})
|
||||||
|
info.update({
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'url': theplatform_url,
|
||||||
|
'description': video_data.get('description'),
|
||||||
|
'keywords': video_data.get('keywords'),
|
||||||
|
'season_number': int_or_none(video_data.get('seasonNumber')),
|
||||||
|
'episode_number': int_or_none(video_data.get('episodeNumber')),
|
||||||
|
'series': video_data.get('showName'),
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex(
|
||||||
|
[
|
||||||
|
r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
|
||||||
|
r'<iframe[^>]+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"',
|
||||||
|
r'"embedURL"\s*:\s*"([^"]+)"'
|
||||||
|
],
|
||||||
|
webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
|
||||||
|
if theplatform_url.startswith('//'):
|
||||||
|
theplatform_url = 'http:' + theplatform_url
|
||||||
|
info['url'] = smuggle_url(theplatform_url, {'source_url': url})
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
class NBCSportsVPlayerIE(InfoExtractor):
|
class NBCSportsVPlayerIE(InfoExtractor):
|
||||||
|
@ -18,6 +18,7 @@ from ..utils import (
|
|||||||
parse_duration,
|
parse_duration,
|
||||||
qualities,
|
qualities,
|
||||||
srt_subtitles_timecode,
|
srt_subtitles_timecode,
|
||||||
|
update_url_query,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -92,6 +93,10 @@ class PluralsightIE(PluralsightBaseIE):
|
|||||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||||
|
|
||||||
if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')):
|
if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')):
|
||||||
|
BLOCKED = 'Your account has been blocked due to suspicious activity'
|
||||||
|
if BLOCKED in response:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Unable to login: %s' % BLOCKED, expected=True)
|
||||||
raise ExtractorError('Unable to log in')
|
raise ExtractorError('Unable to log in')
|
||||||
|
|
||||||
def _get_subtitles(self, author, clip_id, lang, name, duration, video_id):
|
def _get_subtitles(self, author, clip_id, lang, name, duration, video_id):
|
||||||
@ -327,25 +332,44 @@ class PluralsightCourseIE(PluralsightBaseIE):
|
|||||||
# TODO: PSM cookie
|
# TODO: PSM cookie
|
||||||
|
|
||||||
course = self._download_json(
|
course = self._download_json(
|
||||||
'%s/data/course/%s' % (self._API_BASE, course_id),
|
'%s/player/functions/rpc' % self._API_BASE, course_id,
|
||||||
course_id, 'Downloading course JSON')
|
'Downloading course JSON',
|
||||||
|
data=json.dumps({
|
||||||
|
'fn': 'bootstrapPlayer',
|
||||||
|
'payload': {
|
||||||
|
'courseId': course_id,
|
||||||
|
}
|
||||||
|
}).encode('utf-8'),
|
||||||
|
headers={
|
||||||
|
'Content-Type': 'application/json;charset=utf-8'
|
||||||
|
})['payload']['course']
|
||||||
|
|
||||||
title = course['title']
|
title = course['title']
|
||||||
|
course_name = course['name']
|
||||||
|
course_data = course['modules']
|
||||||
description = course.get('description') or course.get('shortDescription')
|
description = course.get('description') or course.get('shortDescription')
|
||||||
|
|
||||||
course_data = self._download_json(
|
|
||||||
'%s/data/course/content/%s' % (self._API_BASE, course_id),
|
|
||||||
course_id, 'Downloading course data JSON')
|
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for num, module in enumerate(course_data, 1):
|
for num, module in enumerate(course_data, 1):
|
||||||
|
author = module.get('author')
|
||||||
|
module_name = module.get('name')
|
||||||
|
if not author or not module_name:
|
||||||
|
continue
|
||||||
for clip in module.get('clips', []):
|
for clip in module.get('clips', []):
|
||||||
player_parameters = clip.get('playerParameters')
|
clip_index = int_or_none(clip.get('index'))
|
||||||
if not player_parameters:
|
if clip_index is None:
|
||||||
continue
|
continue
|
||||||
|
clip_url = update_url_query(
|
||||||
|
'%s/player' % self._API_BASE, query={
|
||||||
|
'mode': 'live',
|
||||||
|
'course': course_name,
|
||||||
|
'author': author,
|
||||||
|
'name': module_name,
|
||||||
|
'clip': clip_index,
|
||||||
|
})
|
||||||
entries.append({
|
entries.append({
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': '%s/training/player?%s' % (self._API_BASE, player_parameters),
|
'url': clip_url,
|
||||||
'ie_key': PluralsightIE.ie_key(),
|
'ie_key': PluralsightIE.ie_key(),
|
||||||
'chapter': module.get('title'),
|
'chapter': module.get('title'),
|
||||||
'chapter_number': num,
|
'chapter_number': num,
|
||||||
|
@ -156,7 +156,18 @@ class PornHubIE(InfoExtractor):
|
|||||||
comment_count = self._extract_count(
|
comment_count = self._extract_count(
|
||||||
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||||
|
|
||||||
video_urls = list(map(compat_urllib_parse_unquote, re.findall(r"player_quality_[0-9]{3}p\s*=\s*'([^']+)'", webpage)))
|
video_variables = {}
|
||||||
|
for video_variablename, quote, video_variable in re.findall(
|
||||||
|
r'(player_quality_[0-9]{3,4}p\w+)\s*=\s*(["\'])(.+?)\2;', webpage):
|
||||||
|
video_variables[video_variablename] = video_variable
|
||||||
|
|
||||||
|
video_urls = []
|
||||||
|
for encoded_video_url in re.findall(
|
||||||
|
r'player_quality_[0-9]{3,4}p\s*=(.+?);', webpage):
|
||||||
|
for varname, varval in video_variables.items():
|
||||||
|
encoded_video_url = encoded_video_url.replace(varname, varval)
|
||||||
|
video_urls.append(re.sub(r'[\s+]', '', encoded_video_url))
|
||||||
|
|
||||||
if webpage.find('"encrypted":true') != -1:
|
if webpage.find('"encrypted":true') != -1:
|
||||||
password = compat_urllib_parse_unquote_plus(
|
password = compat_urllib_parse_unquote_plus(
|
||||||
self._search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
|
self._search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
|
||||||
|
60
youtube_dl/extractor/scrippsnetworks.py
Normal file
60
youtube_dl/extractor/scrippsnetworks.py
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .adobepass import AdobePassIE
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
smuggle_url,
|
||||||
|
update_url_query,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ScrippsNetworksWatchIE(AdobePassIE):
|
||||||
|
IE_NAME = 'scrippsnetworks:watch'
|
||||||
|
_VALID_URL = r'https?://watch\.(?:hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv)\.com/player\.[A-Z0-9]+\.html#(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://watch.hgtv.com/player.HNT.html#0256538',
|
||||||
|
'md5': '26545fd676d939954c6808274bdb905a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0256538',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Seeking a Wow House',
|
||||||
|
'description': 'Buyers retiring in Palm Springs, California, want a modern house with major wow factor. They\'re also looking for a pool and a large, open floorplan with tall windows looking out at the views.',
|
||||||
|
'uploader': 'SCNI',
|
||||||
|
'upload_date': '20170207',
|
||||||
|
'timestamp': 1486450493,
|
||||||
|
},
|
||||||
|
'skip': 'requires TV provider authentication',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
channel = self._parse_json(self._search_regex(
|
||||||
|
r'"channels"\s*:\s*(\[.+\])',
|
||||||
|
webpage, 'channels'), video_id)[0]
|
||||||
|
video_data = next(v for v in channel['videos'] if v.get('nlvid') == video_id)
|
||||||
|
title = video_data['title']
|
||||||
|
release_url = video_data['releaseUrl']
|
||||||
|
if video_data.get('restricted'):
|
||||||
|
requestor_id = self._search_regex(
|
||||||
|
r'requestorId\s*=\s*"([^"]+)";', webpage, 'requestor id')
|
||||||
|
resource = self._get_mvpd_resource(
|
||||||
|
requestor_id, title, video_id,
|
||||||
|
video_data.get('ratings', [{}])[0].get('rating'))
|
||||||
|
auth = self._extract_mvpd_auth(
|
||||||
|
url, video_id, requestor_id, resource)
|
||||||
|
release_url = update_url_query(release_url, {'auth': auth})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'url': smuggle_url(release_url, {'force_smil_url': True}),
|
||||||
|
'description': video_data.get('description'),
|
||||||
|
'thumbnail': video_data.get('thumbnailUrl'),
|
||||||
|
'series': video_data.get('showTitle'),
|
||||||
|
'season_number': int_or_none(video_data.get('season')),
|
||||||
|
'episode_number': int_or_none(video_data.get('episodeNumber')),
|
||||||
|
'ie_key': 'ThePlatform',
|
||||||
|
}
|
@ -1,64 +1,101 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
qualities,
|
|
||||||
int_or_none,
|
|
||||||
mimetype2ext,
|
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
qualities,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class SixPlayIE(InfoExtractor):
|
class SixPlayIE(InfoExtractor):
|
||||||
|
IE_NAME = '6play'
|
||||||
_VALID_URL = r'(?:6play:|https?://(?:www\.)?6play\.fr/.+?-c_)(?P<id>[0-9]+)'
|
_VALID_URL = r'(?:6play:|https?://(?:www\.)?6play\.fr/.+?-c_)(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.6play.fr/jamel-et-ses-amis-au-marrakech-du-rire-p_1316/jamel-et-ses-amis-au-marrakech-du-rire-2015-c_11495320',
|
'url': 'http://www.6play.fr/le-meilleur-patissier-p_1807/le-meilleur-patissier-special-fetes-mercredi-a-21-00-sur-m6-c_11638450',
|
||||||
'md5': '42310bffe4ba3982db112b9cd3467328',
|
'md5': '42310bffe4ba3982db112b9cd3467328',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '11495320',
|
'id': '11638450',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Jamel et ses amis au Marrakech du rire 2015',
|
'title': 'Le Meilleur Pâtissier, spécial fêtes mercredi à 21:00 sur M6',
|
||||||
'description': 'md5:ba2149d5c321d5201b78070ee839d872',
|
'description': 'md5:308853f6a5f9e2d55a30fc0654de415f',
|
||||||
|
'duration': 39,
|
||||||
|
'series': 'Le meilleur pâtissier',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
clip_data = self._download_json(
|
|
||||||
'https://player.m6web.fr/v2/video/config/6play-auth/FR/%s.json' % video_id,
|
|
||||||
video_id)
|
|
||||||
video_data = clip_data['videoInfo']
|
|
||||||
|
|
||||||
|
data = self._download_json(
|
||||||
|
'https://pc.middleware.6play.fr/6play/v2/platforms/m6group_web/services/6play/videos/clip_%s' % video_id,
|
||||||
|
video_id, query={
|
||||||
|
'csa': 5,
|
||||||
|
'with': 'clips',
|
||||||
|
})
|
||||||
|
|
||||||
|
clip_data = data['clips'][0]
|
||||||
|
title = clip_data['title']
|
||||||
|
|
||||||
|
urls = []
|
||||||
quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
|
quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
|
||||||
formats = []
|
formats = []
|
||||||
for source in clip_data['sources']:
|
for asset in clip_data['assets']:
|
||||||
source_type, source_url = source.get('type'), source.get('src')
|
asset_url = asset.get('full_physical_path')
|
||||||
if not source_url or source_type == 'hls/primetime':
|
protocol = asset.get('protocol')
|
||||||
|
if not asset_url or protocol == 'primetime' or asset_url in urls:
|
||||||
continue
|
continue
|
||||||
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
urls.append(asset_url)
|
||||||
if ext == 'm3u8':
|
container = asset.get('video_container')
|
||||||
formats.extend(self._extract_m3u8_formats(
|
ext = determine_ext(asset_url)
|
||||||
source_url, video_id, 'mp4', 'm3u8_native',
|
if container == 'm3u8' or ext == 'm3u8':
|
||||||
m3u8_id='hls', fatal=False))
|
if protocol == 'usp':
|
||||||
formats.extend(self._extract_f4m_formats(
|
asset_url = re.sub(r'/([^/]+)\.ism/[^/]*\.m3u8', r'/\1.ism/\1.m3u8', asset_url)
|
||||||
source_url.replace('.m3u8', '.f4m'),
|
formats.extend(self._extract_m3u8_formats(
|
||||||
video_id, f4m_id='hds', fatal=False))
|
asset_url, video_id, 'mp4', 'm3u8_native',
|
||||||
elif ext == 'mp4':
|
m3u8_id='hls', fatal=False))
|
||||||
quality = source.get('quality')
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
asset_url.replace('.m3u8', '.f4m'),
|
||||||
|
video_id, f4m_id='hds', fatal=False))
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
asset_url.replace('.m3u8', '.mpd'),
|
||||||
|
video_id, mpd_id='dash', fatal=False))
|
||||||
|
formats.extend(self._extract_ism_formats(
|
||||||
|
re.sub(r'/[^/]+\.m3u8', '/Manifest', asset_url),
|
||||||
|
video_id, ism_id='mss', fatal=False))
|
||||||
|
else:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
asset_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif container == 'mp4' or ext == 'mp4':
|
||||||
|
quality = asset.get('video_quality')
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': source_url,
|
'url': asset_url,
|
||||||
'format_id': quality,
|
'format_id': quality,
|
||||||
'quality': quality_key(quality),
|
'quality': quality_key(quality),
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
def get(getter):
|
||||||
|
for src in (data, clip_data):
|
||||||
|
v = try_get(src, getter, compat_str)
|
||||||
|
if v:
|
||||||
|
return v
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_data['title'].strip(),
|
'title': title,
|
||||||
'description': video_data.get('description'),
|
'description': get(lambda x: x['description']),
|
||||||
'duration': int_or_none(video_data.get('duration')),
|
'duration': int_or_none(clip_data.get('duration')),
|
||||||
'series': video_data.get('titlePgm'),
|
'series': get(lambda x: x['program']['title']),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@ -4,11 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urlparse
|
from ..utils import js_to_json
|
||||||
from ..utils import (
|
|
||||||
js_to_json,
|
|
||||||
unified_strdate,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class SportBoxEmbedIE(InfoExtractor):
|
class SportBoxEmbedIE(InfoExtractor):
|
||||||
|
52
youtube_dl/extractor/sprout.py
Normal file
52
youtube_dl/extractor/sprout.py
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .adobepass import AdobePassIE
|
||||||
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
|
update_url_query,
|
||||||
|
smuggle_url,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SproutIE(AdobePassIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?sproutonline\.com/watch/(?P<id>[^/?#]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
|
||||||
|
'md5': '74bf14128578d1e040c3ebc82088f45f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9dexnwtmh8_X',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'A Cowboy Adventure',
|
||||||
|
'description': 'Ruff-Ruff, Tweet and Dave get to be cowboys for the day at Six Cow Corral.',
|
||||||
|
'timestamp': 1437758640,
|
||||||
|
'upload_date': '20150724',
|
||||||
|
'uploader': 'NBCU-SPROUT-NEW',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_component = self._search_regex(
|
||||||
|
r'(?s)(<div[^>]+data-component="video"[^>]*?>)',
|
||||||
|
webpage, 'video component', default=None)
|
||||||
|
if video_component:
|
||||||
|
options = self._parse_json(extract_attributes(
|
||||||
|
video_component)['data-options'], video_id)
|
||||||
|
theplatform_url = options['video']
|
||||||
|
query = {
|
||||||
|
'mbr': 'true',
|
||||||
|
'manifest': 'm3u',
|
||||||
|
}
|
||||||
|
if options.get('protected'):
|
||||||
|
query['auth'] = self._extract_mvpd_auth(url, options['pid'], 'sprout', 'sprout')
|
||||||
|
theplatform_url = smuggle_url(update_url_query(
|
||||||
|
theplatform_url, query), {'force_smil_url': True})
|
||||||
|
else:
|
||||||
|
iframe = self._search_regex(
|
||||||
|
r'(<iframe[^>]+id="sproutVideoIframe"[^>]*?>)',
|
||||||
|
webpage, 'iframe')
|
||||||
|
theplatform_url = extract_attributes(iframe)['src']
|
||||||
|
|
||||||
|
return self.url_result(theplatform_url, 'ThePlatform')
|
@ -306,9 +306,10 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
|||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}):
|
def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
|
||||||
real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
|
real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
|
||||||
entry = self._download_json(real_url, video_id)['entries'][0]
|
entry = self._download_json(real_url, video_id)['entries'][0]
|
||||||
|
main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else None
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
@ -333,7 +334,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
|||||||
if asset_type in asset_types_query:
|
if asset_type in asset_types_query:
|
||||||
query.update(asset_types_query[asset_type])
|
query.update(asset_types_query[asset_type])
|
||||||
cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query(
|
cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query(
|
||||||
smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
|
main_smil_url or smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
|
||||||
formats.extend(cur_formats)
|
formats.extend(cur_formats)
|
||||||
subtitles = self._merge_subtitles(subtitles, cur_subtitles)
|
subtitles = self._merge_subtitles(subtitles, cur_subtitles)
|
||||||
|
|
||||||
|
75
youtube_dl/extractor/tvplayer.py
Normal file
75
youtube_dl/extractor/tvplayer.py
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_HTTPError
|
||||||
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
|
urlencode_postdata,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TVPlayerIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?tvplayer\.com/watch/(?P<id>[^/?#]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://tvplayer.com/watch/bbcone',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '89',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': r're:^BBC One [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
current_channel = extract_attributes(self._search_regex(
|
||||||
|
r'(<div[^>]+class="[^"]*current-channel[^"]*"[^>]*>)',
|
||||||
|
webpage, 'channel element'))
|
||||||
|
title = current_channel['data-name']
|
||||||
|
|
||||||
|
resource_id = self._search_regex(
|
||||||
|
r'resourceId\s*=\s*"(\d+)"', webpage, 'resource id')
|
||||||
|
platform = self._search_regex(
|
||||||
|
r'platform\s*=\s*"([^"]+)"', webpage, 'platform')
|
||||||
|
token = self._search_regex(
|
||||||
|
r'token\s*=\s*"([^"]+)"', webpage, 'token', default='null')
|
||||||
|
validate = self._search_regex(
|
||||||
|
r'validate\s*=\s*"([^"]+)"', webpage, 'validate', default='null')
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = self._download_json(
|
||||||
|
'http://api.tvplayer.com/api/v2/stream/live',
|
||||||
|
resource_id, headers={
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||||
|
}, data=urlencode_postdata({
|
||||||
|
'service': 1,
|
||||||
|
'platform': platform,
|
||||||
|
'id': resource_id,
|
||||||
|
'token': token,
|
||||||
|
'validate': validate,
|
||||||
|
}))['tvplayer']['response']
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError):
|
||||||
|
response = self._parse_json(
|
||||||
|
e.cause.read().decode(), resource_id)['tvplayer']['response']
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s said: %s' % (self.IE_NAME, response['error']), expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(response['stream'], resource_id, 'mp4')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': resource_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': self._live_title(title),
|
||||||
|
'formats': formats,
|
||||||
|
'is_live': True,
|
||||||
|
}
|
99
youtube_dl/extractor/videopress.py
Normal file
99
youtube_dl/extractor/videopress.py
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import random
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
float_or_none,
|
||||||
|
parse_age_limit,
|
||||||
|
qualities,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class VideoPressIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://videopress\.com/embed/(?P<id>[\da-zA-Z]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://videopress.com/embed/kUJmAcSf',
|
||||||
|
'md5': '706956a6c875873d51010921310e4bc6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'kUJmAcSf',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'VideoPress Demo',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'duration': 634.6,
|
||||||
|
'timestamp': 1434983935,
|
||||||
|
'upload_date': '20150622',
|
||||||
|
'age_limit': 0,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# 17+, requires birth_* params
|
||||||
|
'url': 'https://videopress.com/embed/iH3gstfZ',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
return re.findall(
|
||||||
|
r'<iframe[^>]+src=["\']((?:https?://)?videopress\.com/embed/[\da-zA-Z]+)',
|
||||||
|
webpage)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
video = self._download_json(
|
||||||
|
'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id,
|
||||||
|
video_id, query={
|
||||||
|
'birth_month': random.randint(1, 12),
|
||||||
|
'birth_day': random.randint(1, 31),
|
||||||
|
'birth_year': random.randint(1950, 1995),
|
||||||
|
})
|
||||||
|
|
||||||
|
title = video['title']
|
||||||
|
|
||||||
|
def base_url(scheme):
|
||||||
|
return try_get(
|
||||||
|
video, lambda x: x['file_url_base'][scheme], compat_str)
|
||||||
|
|
||||||
|
base_url = base_url('https') or base_url('http')
|
||||||
|
|
||||||
|
QUALITIES = ('std', 'dvd', 'hd')
|
||||||
|
quality = qualities(QUALITIES)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id, f in video['files'].items():
|
||||||
|
if not isinstance(f, dict):
|
||||||
|
continue
|
||||||
|
for ext, path in f.items():
|
||||||
|
if ext in ('mp4', 'ogg'):
|
||||||
|
formats.append({
|
||||||
|
'url': urljoin(base_url, path),
|
||||||
|
'format_id': '%s-%s' % (format_id, ext),
|
||||||
|
'ext': determine_ext(path, ext),
|
||||||
|
'quality': quality(format_id),
|
||||||
|
})
|
||||||
|
original_url = try_get(video, lambda x: x['original'], compat_str)
|
||||||
|
if original_url:
|
||||||
|
formats.append({
|
||||||
|
'url': original_url,
|
||||||
|
'format_id': 'original',
|
||||||
|
'quality': len(QUALITIES),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': video.get('description'),
|
||||||
|
'thumbnail': video.get('poster'),
|
||||||
|
'duration': float_or_none(video.get('duration'), 1000),
|
||||||
|
'timestamp': unified_timestamp(video.get('upload_date')),
|
||||||
|
'age_limit': parse_age_limit(video.get('rating')),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -44,6 +44,9 @@ class XTubeIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'xtube:625837',
|
'url': 'xtube:625837',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'xtube:kVTUy_G222_',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -53,14 +56,20 @@ class XTubeIE(InfoExtractor):
|
|||||||
|
|
||||||
if not display_id:
|
if not display_id:
|
||||||
display_id = video_id
|
display_id = video_id
|
||||||
url = 'http://www.xtube.com/watch.php?v=%s' % video_id
|
|
||||||
|
|
||||||
req = sanitized_Request(url)
|
if video_id.isdigit() and len(video_id) < 11:
|
||||||
req.add_header('Cookie', 'age_verified=1; cookiesAccepted=1')
|
url_pattern = 'http://www.xtube.com/video-watch/-%s'
|
||||||
webpage = self._download_webpage(req, display_id)
|
else:
|
||||||
|
url_pattern = 'http://www.xtube.com/watch.php?v=%s'
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
url_pattern % video_id, display_id, headers={
|
||||||
|
'Cookie': 'age_verified=1; cookiesAccepted=1',
|
||||||
|
})
|
||||||
|
|
||||||
sources = self._parse_json(self._search_regex(
|
sources = self._parse_json(self._search_regex(
|
||||||
r'sources\s*:\s*({.+?}),', webpage, 'sources'), video_id)
|
r'(["\'])sources\1\s*:\s*(?P<sources>{.+?}),',
|
||||||
|
webpage, 'sources', group='sources'), video_id)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, format_url in sources.items():
|
for format_id, format_url in sources.items():
|
||||||
@ -72,7 +81,7 @@ class XTubeIE(InfoExtractor):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = self._search_regex(
|
title = self._search_regex(
|
||||||
(r'<h1>(?P<title>[^<]+)</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'),
|
(r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'),
|
||||||
webpage, 'title', group='title')
|
webpage, 'title', group='title')
|
||||||
description = self._search_regex(
|
description = self._search_regex(
|
||||||
r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False)
|
r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False)
|
||||||
@ -81,10 +90,10 @@ class XTubeIE(InfoExtractor):
|
|||||||
r'<span[^>]+class="nickname"[^>]*>([^<]+)'),
|
r'<span[^>]+class="nickname"[^>]*>([^<]+)'),
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
duration = parse_duration(self._search_regex(
|
duration = parse_duration(self._search_regex(
|
||||||
r'<dt>Runtime:</dt>\s*<dd>([^<]+)</dd>',
|
r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>',
|
||||||
webpage, 'duration', fatal=False))
|
webpage, 'duration', fatal=False))
|
||||||
view_count = str_to_int(self._search_regex(
|
view_count = str_to_int(self._search_regex(
|
||||||
r'<dt>Views:</dt>\s*<dd>([\d,\.]+)</dd>',
|
r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>',
|
||||||
webpage, 'view count', fatal=False))
|
webpage, 'view count', fatal=False))
|
||||||
comment_count = str_to_int(self._html_search_regex(
|
comment_count = str_to_int(self._html_search_regex(
|
||||||
r'>Comments? \(([\d,\.]+)\)<',
|
r'>Comments? \(([\d,\.]+)\)<',
|
||||||
|
@ -34,6 +34,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
parse_codecs,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
remove_quotes,
|
remove_quotes,
|
||||||
remove_start,
|
remove_start,
|
||||||
@ -1696,15 +1697,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
codecs = mobj.group('val')
|
codecs = mobj.group('val')
|
||||||
break
|
break
|
||||||
if codecs:
|
if codecs:
|
||||||
codecs = codecs.split(',')
|
dct.update(parse_codecs(codecs))
|
||||||
if len(codecs) == 2:
|
|
||||||
acodec, vcodec = codecs[1], codecs[0]
|
|
||||||
else:
|
|
||||||
acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
|
|
||||||
dct.update({
|
|
||||||
'acodec': acodec,
|
|
||||||
'vcodec': vcodec,
|
|
||||||
})
|
|
||||||
formats.append(dct)
|
formats.append(dct)
|
||||||
elif video_info.get('hlsvp'):
|
elif video_info.get('hlsvp'):
|
||||||
manifest_url = video_info['hlsvp'][0]
|
manifest_url = video_info['hlsvp'][0]
|
||||||
|
@ -20,9 +20,9 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class ZDFBaseIE(InfoExtractor):
|
class ZDFBaseIE(InfoExtractor):
|
||||||
def _call_api(self, url, player, referrer, video_id):
|
def _call_api(self, url, player, referrer, video_id, item):
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
url, video_id, 'Downloading JSON content',
|
url, video_id, 'Downloading JSON %s' % item,
|
||||||
headers={
|
headers={
|
||||||
'Referer': referrer,
|
'Referer': referrer,
|
||||||
'Api-Auth': 'Bearer %s' % player['apiToken'],
|
'Api-Auth': 'Bearer %s' % player['apiToken'],
|
||||||
@ -104,7 +104,7 @@ class ZDFIE(ZDFBaseIE):
|
|||||||
})
|
})
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
def _extract_entry(self, url, content, video_id):
|
def _extract_entry(self, url, player, content, video_id):
|
||||||
title = content.get('title') or content['teaserHeadline']
|
title = content.get('title') or content['teaserHeadline']
|
||||||
|
|
||||||
t = content['mainVideoContent']['http://zdf.de/rels/target']
|
t = content['mainVideoContent']['http://zdf.de/rels/target']
|
||||||
@ -116,7 +116,8 @@ class ZDFIE(ZDFBaseIE):
|
|||||||
'http://zdf.de/rels/streams/ptmd-template'].replace(
|
'http://zdf.de/rels/streams/ptmd-template'].replace(
|
||||||
'{playerId}', 'portal')
|
'{playerId}', 'portal')
|
||||||
|
|
||||||
ptmd = self._download_json(urljoin(url, ptmd_path), video_id)
|
ptmd = self._call_api(
|
||||||
|
urljoin(url, ptmd_path), player, url, video_id, 'metadata')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
track_uris = set()
|
track_uris = set()
|
||||||
@ -174,8 +175,9 @@ class ZDFIE(ZDFBaseIE):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _extract_regular(self, url, player, video_id):
|
def _extract_regular(self, url, player, video_id):
|
||||||
content = self._call_api(player['content'], player, url, video_id)
|
content = self._call_api(
|
||||||
return self._extract_entry(player['content'], content, video_id)
|
player['content'], player, url, video_id, 'content')
|
||||||
|
return self._extract_entry(player['content'], player, content, video_id)
|
||||||
|
|
||||||
def _extract_mobile(self, video_id):
|
def _extract_mobile(self, video_id):
|
||||||
document = self._download_json(
|
document = self._download_json(
|
||||||
|
@ -337,17 +337,30 @@ def get_element_by_id(id, html):
|
|||||||
|
|
||||||
|
|
||||||
def get_element_by_class(class_name, html):
|
def get_element_by_class(class_name, html):
|
||||||
return get_element_by_attribute(
|
"""Return the content of the first tag with the specified class in the passed HTML document"""
|
||||||
|
retval = get_elements_by_class(class_name, html)
|
||||||
|
return retval[0] if retval else None
|
||||||
|
|
||||||
|
|
||||||
|
def get_element_by_attribute(attribute, value, html, escape_value=True):
|
||||||
|
retval = get_elements_by_attribute(attribute, value, html, escape_value)
|
||||||
|
return retval[0] if retval else None
|
||||||
|
|
||||||
|
|
||||||
|
def get_elements_by_class(class_name, html):
|
||||||
|
"""Return the content of all tags with the specified class in the passed HTML document as a list"""
|
||||||
|
return get_elements_by_attribute(
|
||||||
'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
|
'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
|
||||||
html, escape_value=False)
|
html, escape_value=False)
|
||||||
|
|
||||||
|
|
||||||
def get_element_by_attribute(attribute, value, html, escape_value=True):
|
def get_elements_by_attribute(attribute, value, html, escape_value=True):
|
||||||
"""Return the content of the tag with the specified attribute in the passed HTML document"""
|
"""Return the content of the tag with the specified attribute in the passed HTML document"""
|
||||||
|
|
||||||
value = re.escape(value) if escape_value else value
|
value = re.escape(value) if escape_value else value
|
||||||
|
|
||||||
m = re.search(r'''(?xs)
|
retlist = []
|
||||||
|
for m in re.finditer(r'''(?xs)
|
||||||
<([a-zA-Z0-9:._-]+)
|
<([a-zA-Z0-9:._-]+)
|
||||||
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
|
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
|
||||||
\s+%s=['"]?%s['"]?
|
\s+%s=['"]?%s['"]?
|
||||||
@ -355,16 +368,15 @@ def get_element_by_attribute(attribute, value, html, escape_value=True):
|
|||||||
\s*>
|
\s*>
|
||||||
(?P<content>.*?)
|
(?P<content>.*?)
|
||||||
</\1>
|
</\1>
|
||||||
''' % (re.escape(attribute), value), html)
|
''' % (re.escape(attribute), value), html):
|
||||||
|
res = m.group('content')
|
||||||
|
|
||||||
if not m:
|
if res.startswith('"') or res.startswith("'"):
|
||||||
return None
|
res = res[1:-1]
|
||||||
res = m.group('content')
|
|
||||||
|
|
||||||
if res.startswith('"') or res.startswith("'"):
|
retlist.append(unescapeHTML(res))
|
||||||
res = res[1:-1]
|
|
||||||
|
|
||||||
return unescapeHTML(res)
|
return retlist
|
||||||
|
|
||||||
|
|
||||||
class HTMLAttributeParser(compat_HTMLParser):
|
class HTMLAttributeParser(compat_HTMLParser):
|
||||||
@ -1672,6 +1684,11 @@ def setproctitle(title):
|
|||||||
libc = ctypes.cdll.LoadLibrary('libc.so.6')
|
libc = ctypes.cdll.LoadLibrary('libc.so.6')
|
||||||
except OSError:
|
except OSError:
|
||||||
return
|
return
|
||||||
|
except TypeError:
|
||||||
|
# LoadLibrary in Windows Python 2.7.13 only expects
|
||||||
|
# a bytestring, but since unicode_literals turns
|
||||||
|
# every string into a unicode string, it fails.
|
||||||
|
return
|
||||||
title_bytes = title.encode('utf-8')
|
title_bytes = title.encode('utf-8')
|
||||||
buf = ctypes.create_string_buffer(len(title_bytes))
|
buf = ctypes.create_string_buffer(len(title_bytes))
|
||||||
buf.value = title_bytes
|
buf.value = title_bytes
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2017.02.04.1'
|
__version__ = '2017.02.14'
|
||||||
|
Reference in New Issue
Block a user