Compare commits
396 Commits
2015.10.16
...
2015.11.24
Author | SHA1 | Date | |
---|---|---|---|
|
ba7a92b0ce | ||
|
4c7d816dd7 | ||
|
032f2f260f | ||
|
20e98bf6c0 | ||
|
5c2266df4b | ||
|
67dda51722 | ||
|
e4c4bcf36f | ||
|
82d8a8b6e2 | ||
|
13a10d5aa3 | ||
|
9022726446 | ||
|
94bfcd23b7 | ||
|
526b3b0716 | ||
|
61f92af1cf | ||
|
a72778d364 | ||
|
5ae17037a3 | ||
|
02f0da20b0 | ||
|
b41631c4e6 | ||
|
0e49d9a6b0 | ||
|
4a7d108ab3 | ||
|
3cfd000849 | ||
|
1b38185361 | ||
|
9cb9a5df77 | ||
|
5035536e3f | ||
|
3e12bc583a | ||
|
e568c2233e | ||
|
061a75edd6 | ||
|
82c4d7b0ce | ||
|
136dadde95 | ||
|
0c14841585 | ||
|
0eebf34d9d | ||
|
cf186b77a7 | ||
|
a3372437bf | ||
|
4c57b4853d | ||
|
38eb2968ab | ||
|
bea56c9569 | ||
|
7e508ff2cf | ||
|
563772eda4 | ||
|
0533915aad | ||
|
c3a227d1c4 | ||
|
f6c903e708 | ||
|
7dc011c063 | ||
|
4e3b303016 | ||
|
7e1f5447e7 | ||
|
7e3472758b | ||
|
328a22e175 | ||
|
417b453699 | ||
|
6ea7190a3e | ||
|
b54b08c91b | ||
|
c30943b1c0 | ||
|
2abf7cab80 | ||
|
4137196899 | ||
|
019839faaa | ||
|
f52183a878 | ||
|
750b9ff032 | ||
|
28602e747c | ||
|
6cc37c69e2 | ||
|
a5cd0eb8a4 | ||
|
c23e266427 | ||
|
651acffbe5 | ||
|
71bd93b89c | ||
|
6da620de58 | ||
|
bdceea7afd | ||
|
d80a39cec8 | ||
|
5b5fae5f20 | ||
|
01b06aedcf | ||
|
c711383811 | ||
|
17cc153435 | ||
|
67446fd49b | ||
|
325bb615a7 | ||
|
ee5cd8418e | ||
|
342609a1b4 | ||
|
f270cf1a26 | ||
|
371c3b796c | ||
|
6b7ceee1b9 | ||
|
fdb20a27a3 | ||
|
2c94198eb6 | ||
|
e8110b8125 | ||
|
c39fd7b1ca | ||
|
a9c09a7c62 | ||
|
82beaabb41 | ||
|
63b4295d20 | ||
|
312a3f389b | ||
|
609af1ae1c | ||
|
4cd759f73d | ||
|
e156e70281 | ||
|
9b464929fe | ||
|
0c176d7bde | ||
|
7a3f0c00ad | ||
|
7aefc49c40 | ||
|
741dd8ea65 | ||
|
76adc82068 | ||
|
bd1512d196 | ||
|
9a4acbfaf5 | ||
|
ad1f4e7902 | ||
|
b328295910 | ||
|
828b2a5cd9 | ||
|
2ff7cbeaaa | ||
|
b2f7738830 | ||
|
dc0279532a | ||
|
0c59d02bdc | ||
|
0f72beb515 | ||
|
d781e29316 | ||
|
3b3e8ed332 | ||
|
dcdfeb33d2 | ||
|
0d85c3a732 | ||
|
903d136942 | ||
|
9d584da7d0 | ||
|
31752f76f7 | ||
|
5f1b2aea80 | ||
|
4479600d57 | ||
|
a90189c3ad | ||
|
d8a1caf04f | ||
|
cb33d389ed | ||
|
967e0955f0 | ||
|
e01b432ad3 | ||
|
fd91257c40 | ||
|
c7b959ce38 | ||
|
75eac8961e | ||
|
3b7d9aa487 | ||
|
1f4b722b00 | ||
|
f6519f89b0 | ||
|
24af85298e | ||
|
e721d857c2 | ||
|
5c17f0a67a | ||
|
4fcaa4f4a5 | ||
|
536f819eda | ||
|
a662489877 | ||
|
a2973eb597 | ||
|
4e21b3a94f | ||
|
b703ebeeaf | ||
|
b84a5f0337 | ||
|
a1ec9a7553 | ||
|
91d644b5ba | ||
|
5d6c3d6a66 | ||
|
1ebb4717df | ||
|
cf5881fc4d | ||
|
fcd817a326 | ||
|
031ec536f0 | ||
|
668db403f9 | ||
|
b9ad101926 | ||
|
435911029f | ||
|
699ed30cee | ||
|
9eab37dca0 | ||
|
9a8a12b7d8 | ||
|
a4c2ab35c1 | ||
|
3d9c4bf09a | ||
|
8b8a39e279 | ||
|
82393e2bb2 | ||
|
2eb99a4b98 | ||
|
6abce58a12 | ||
|
990e6e8fa3 | ||
|
bfd88516eb | ||
|
d8b7e80d29 | ||
|
37120974dc | ||
|
42fc93c709 | ||
|
a625e56543 | ||
|
9b738b2caa | ||
|
90bb5667bf | ||
|
d3d3e2e3aa | ||
|
37ca7b22b5 | ||
|
50f84a9ae1 | ||
|
ff29bf81f8 | ||
|
b25f753397 | ||
|
6a5d6de1e3 | ||
|
1c31a5b0e0 | ||
|
4f5cdf7c9b | ||
|
f09a767d31 | ||
|
cc8034cc4c | ||
|
50506cb607 | ||
|
aa8d2d5be6 | ||
|
114e6025b0 | ||
|
fda2717ef9 | ||
|
937511dfc0 | ||
|
d5c181a14e | ||
|
e8ce2375e0 | ||
|
6fdb39ded1 | ||
|
8e3a2bd620 | ||
|
a06bf87a2c | ||
|
ee4337d100 | ||
|
cff551c0b0 | ||
|
6d02b9a392 | ||
|
2c740cf28d | ||
|
5214f1e31d | ||
|
5d0f84d32c | ||
|
ee223abb88 | ||
|
21d0c33ecd | ||
|
8b6d9406db | ||
|
686f98816e | ||
|
0fa6b17dcc | ||
|
472404953a | ||
|
ae4ddf9efa | ||
|
ea8ed40b2f | ||
|
71bb016160 | ||
|
179ffab69c | ||
|
deb85c32bb | ||
|
92366d189e | ||
|
81413c0165 | ||
|
1e2eb4b40d | ||
|
01003d072c | ||
|
5003e4283b | ||
|
123c781044 | ||
|
e68dd1921a | ||
|
6953d8e95a | ||
|
b3613d36da | ||
|
53472df857 | ||
|
2549e113b8 | ||
|
b15c44cd36 | ||
|
f93ded9852 | ||
|
89ea063eeb | ||
|
44b2264fea | ||
|
cb5a470635 | ||
|
17d1900581 | ||
|
5d501a0901 | ||
|
c13722480b | ||
|
e7d34c03f2 | ||
|
264cd00fff | ||
|
a4a6b7b80f | ||
|
aebb42d32b | ||
|
b4ef6a0038 | ||
|
5d235ca7f6 | ||
|
c3459d24f1 | ||
|
e3778cce0e | ||
|
ad607563a2 | ||
|
236cb2131b | ||
|
66d041f250 | ||
|
f3cb54e6d9 | ||
|
0aeb9a106e | ||
|
fd8102820c | ||
|
bfdf891fd3 | ||
|
3fa3ff1bc3 | ||
|
0a0110fc6b | ||
|
852fad922f | ||
|
fc68d52bb9 | ||
|
dde9fe9788 | ||
|
a230068ff7 | ||
|
6a75040278 | ||
|
c514b0ec65 | ||
|
eb97f46e8b | ||
|
c90d16cf36 | ||
|
ab6ca04802 | ||
|
999079b454 | ||
|
8a06999ba0 | ||
|
80dcee5cd5 | ||
|
9550ca506f | ||
|
30eecc6a04 | ||
|
dbd82a1d4f | ||
|
76f0c50d3d | ||
|
dc519b5421 | ||
|
ae12bc3ebb | ||
|
e327b736ca | ||
|
82b69a5cbb | ||
|
11465da702 | ||
|
578c074575 | ||
|
8cdb5c8453 | ||
|
2b1b2d83ca | ||
|
c3040bd00a | ||
|
8c1aa28c27 | ||
|
78d7ee19dc | ||
|
892015b088 | ||
|
47f2d01a5a | ||
|
33a513faf7 | ||
|
6722ebd437 | ||
|
721f5a277c | ||
|
6fb8ace671 | ||
|
ae37338e68 | ||
|
03c2c162f9 | ||
|
52c3a6e49d | ||
|
4e16c1f80b | ||
|
7ccb2b84dd | ||
|
0a192fbea7 | ||
|
a526167d40 | ||
|
f78546272c | ||
|
c137cc0d33 | ||
|
6e4b8b2891 | ||
|
5dadae079b | ||
|
cd08d806b1 | ||
|
5f9f87c06f | ||
|
387db16a78 | ||
|
36e6f62cd0 | ||
|
755ff8d22c | ||
|
7b3a19e533 | ||
|
4f13f8f798 | ||
|
feb7711cf5 | ||
|
589c33dade | ||
|
e572a1010b | ||
|
7e0dc61334 | ||
|
8e82ecfe8f | ||
|
ec29539e06 | ||
|
8cd9614abf | ||
|
324ac0a243 | ||
|
3711304510 | ||
|
50b936936d | ||
|
d97da29da2 | ||
|
7687b354c5 | ||
|
36d7281037 | ||
|
865d1fbafc | ||
|
ac21e71968 | ||
|
943a1e24b8 | ||
|
50f01302d3 | ||
|
0198807ef9 | ||
|
6856139705 | ||
|
c93153852f | ||
|
ab9c7214ee | ||
|
dae69640d0 | ||
|
edeb3e7cb1 | ||
|
5c43afd40f | ||
|
9170ca5b16 | ||
|
65d49afa48 | ||
|
ab03c0b47c | ||
|
7690787553 | ||
|
a65402ef42 | ||
|
7033bc1a51 | ||
|
89d5fbf354 | ||
|
8c3533ba97 | ||
|
44d6dd08b2 | ||
|
cc449417c4 | ||
|
7308b8cb3d | ||
|
4211c83aa4 | ||
|
d01949dc89 | ||
|
63a6494834 | ||
|
8bea039b83 | ||
|
d65889bbc0 | ||
|
4a8963770e | ||
|
5b0aa2c7b1 | ||
|
b6aa99aff8 | ||
|
0be30bafa4 | ||
|
7b091c370c | ||
|
334b5c3b72 | ||
|
b7cedb1604 | ||
|
2038ad6ee7 | ||
|
b243340f0c | ||
|
8cc83d301d | ||
|
d762f86e94 | ||
|
264b23e1a4 | ||
|
a6e0afa2bb | ||
|
4285a47f40 | ||
|
e36963e0eb | ||
|
dedd35c6bc | ||
|
608945d44a | ||
|
b1bf063503 | ||
|
14bddf35fb | ||
|
ef6c868f23 | ||
|
6682049dee | ||
|
b0f001a6cb | ||
|
dd67702a3e | ||
|
05a3879f1c | ||
|
4a7b790384 | ||
|
09ff81316e | ||
|
c88aec845a | ||
|
77a54b6a65 | ||
|
575036b405 | ||
|
f6dfd6603a | ||
|
e04edad621 | ||
|
f322bfb063 | ||
|
014e880372 | ||
|
01d22d4703 | ||
|
48aae2d2cf | ||
|
c571dea953 | ||
|
8b172c2e10 | ||
|
0a67a3632b | ||
|
985e4fdc07 | ||
|
1e399778ee | ||
|
2e022397c4 | ||
|
02835c6bf4 | ||
|
91816e8f16 | ||
|
10c38c7ca2 | ||
|
94a773feb9 | ||
|
448ef1f31c | ||
|
49941c4e4f | ||
|
80f48920c8 | ||
|
5a11b793fe | ||
|
7593fbaa12 | ||
|
2eb0f72a0e | ||
|
8e5b121948 | ||
|
648e6a1ffe | ||
|
583882fdce | ||
|
9eb31b265f | ||
|
ddeb1b3de2 | ||
|
59fe4824f8 | ||
|
dd8417526b | ||
|
09670d5ba4 | ||
|
41a7b00f18 | ||
|
350c948133 | ||
|
e5e9966199 | ||
|
fbd9f6ea80 | ||
|
6df7179e6c | ||
|
36eb802baf | ||
|
c01e1a96aa | ||
|
53407e3f38 | ||
|
ed1269000f | ||
|
689fb748ee | ||
|
9e7e0dffd5 | ||
|
c3dea3f878 | ||
|
f57f84f606 | ||
|
eb08081330 | ||
|
f870544302 |
2
AUTHORS
2
AUTHORS
@@ -144,3 +144,5 @@ Lee Jenkins
|
||||
Anssi Hannula
|
||||
Lukáš Lalinský
|
||||
Qijiang Fan
|
||||
Rémy Léone
|
||||
Marco Ferragina
|
||||
|
@@ -1,6 +1,6 @@
|
||||
**Please include the full output of youtube-dl when run with `-v`**.
|
||||
|
||||
The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
||||
The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
||||
|
||||
Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
|
||||
|
||||
@@ -114,12 +114,13 @@ If you want to add support for a new site, you can follow this quick list (assum
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# TODO more code goes here, for example ...
|
||||
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
|
||||
title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
|
||||
# TODO more properties (see youtube_dl/extractor/common.py)
|
||||
}
|
||||
```
|
||||
|
15
README.md
15
README.md
@@ -329,8 +329,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
## Subtitle Options:
|
||||
--write-sub Write subtitle file
|
||||
--write-auto-sub Write automatic subtitle file (YouTube
|
||||
only)
|
||||
--write-auto-sub Write automatically generated subtitle file
|
||||
(YouTube only)
|
||||
--all-subs Download all the available subtitles of the
|
||||
video
|
||||
--list-subs List all available subtitles for the video
|
||||
@@ -534,6 +534,12 @@ Most people asking this question are not aware that youtube-dl now defaults to d
|
||||
|
||||
Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl.
|
||||
|
||||
### Do I need any other programs?
|
||||
|
||||
youtube-dl works fine on its own on most sites. However, if you want to convert video/audio, you'll need [avconv](https://libav.org/) or [ffmpeg](https://www.ffmpeg.org/). On some sites - most notably YouTube - videos can be retrieved in a higher quality format without sound. youtube-dl will detect whether avconv/ffmpeg is present and automatically pick the best option.
|
||||
|
||||
Some videos or video formats can also be only downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed.
|
||||
|
||||
### I have downloaded a video but how can I play it?
|
||||
|
||||
Once the video is fully downloaded, use any video player, such as [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/).
|
||||
@@ -710,12 +716,13 @@ If you want to add support for a new site, you can follow this quick list (assum
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# TODO more code goes here, for example ...
|
||||
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
|
||||
title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
|
||||
# TODO more properties (see youtube_dl/extractor/common.py)
|
||||
}
|
||||
```
|
||||
@@ -794,7 +801,7 @@ Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/i
|
||||
|
||||
**Please include the full output of youtube-dl when run with `-v`**.
|
||||
|
||||
The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
||||
The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
||||
|
||||
Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
|
||||
|
||||
|
@@ -53,6 +53,7 @@
|
||||
- **Bandcamp:album**
|
||||
- **bbc**: BBC
|
||||
- **bbc.co.uk**: BBC iPlayer
|
||||
- **bbc.co.uk:article**: BBC articles
|
||||
- **BeatportPro**
|
||||
- **Beeg**
|
||||
- **BehindKink**
|
||||
@@ -66,7 +67,8 @@
|
||||
- **Bpb**: Bundeszentrale für politische Bildung
|
||||
- **BR**: Bayerischer Rundfunk Mediathek
|
||||
- **Break**
|
||||
- **Brightcove**
|
||||
- **brightcove:legacy**
|
||||
- **brightcove:new**
|
||||
- **bt:article**: Bergens Tidende Articles
|
||||
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen
|
||||
- **BuzzFeed**
|
||||
@@ -92,6 +94,7 @@
|
||||
- **Clipsyndicate**
|
||||
- **Cloudy**
|
||||
- **Clubic**
|
||||
- **Clyp**
|
||||
- **cmt.com**
|
||||
- **CNET**
|
||||
- **CNN**
|
||||
@@ -121,10 +124,12 @@
|
||||
- **DctpTv**
|
||||
- **DeezerPlaylist**
|
||||
- **defense.gouv.fr**
|
||||
- **democracynow**
|
||||
- **DHM**: Filmarchiv - Deutsches Historisches Museum
|
||||
- **Discovery**
|
||||
- **Dotsub**
|
||||
- **DouyuTV**: 斗鱼
|
||||
- **DPlay**
|
||||
- **dramafever**
|
||||
- **dramafever:series**
|
||||
- **DRBonanza**
|
||||
@@ -193,10 +198,10 @@
|
||||
- **Giga**
|
||||
- **Glide**: Glide mobile video messages (glide.me)
|
||||
- **Globo**
|
||||
- **GloboArticle**
|
||||
- **GodTube**
|
||||
- **GoldenMoustache**
|
||||
- **Golem**
|
||||
- **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net and filehoot.com
|
||||
- **Goshgay**
|
||||
- **Groupon**
|
||||
- **Hark**
|
||||
@@ -280,7 +285,7 @@
|
||||
- **macgamestore**: MacGameStore trailers
|
||||
- **mailru**: Видео@Mail.Ru
|
||||
- **Malemotion**
|
||||
- **MDR**
|
||||
- **MDR**: MDR.DE and KiKA
|
||||
- **media.ccc.de**
|
||||
- **metacafe**
|
||||
- **Metacritic**
|
||||
@@ -363,6 +368,7 @@
|
||||
- **nowness:playlist**
|
||||
- **nowness:series**
|
||||
- **NowTV**
|
||||
- **NowTVList**
|
||||
- **nowvideo**: NowVideo
|
||||
- **npo**: npo.nl and ntr.nl
|
||||
- **npo.nl:live**
|
||||
@@ -422,7 +428,6 @@
|
||||
- **qqmusic:playlist**: QQ音乐 - 歌单
|
||||
- **qqmusic:singer**: QQ音乐 - 歌手
|
||||
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
||||
- **Quickscope**: Quick Scope
|
||||
- **QuickVid**
|
||||
- **R7**
|
||||
- **radio.de**
|
||||
@@ -489,6 +494,7 @@
|
||||
- **soompi:show**
|
||||
- **soundcloud**
|
||||
- **soundcloud:playlist**
|
||||
- **soundcloud:search**: Soundcloud search
|
||||
- **soundcloud:set**
|
||||
- **soundcloud:user**
|
||||
- **soundgasm**
|
||||
@@ -515,6 +521,7 @@
|
||||
- **SSA**
|
||||
- **stanfordoc**: Stanford Open ClassRoom
|
||||
- **Steam**
|
||||
- **Stitcher**
|
||||
- **streamcloud.eu**
|
||||
- **StreamCZ**
|
||||
- **StreetVoice**
|
||||
@@ -588,7 +595,8 @@
|
||||
- **twitch:stream**
|
||||
- **twitch:video**
|
||||
- **twitch:vod**
|
||||
- **TwitterCard**
|
||||
- **twitter**
|
||||
- **twitter:card**
|
||||
- **Ubu**
|
||||
- **udemy**
|
||||
- **udemy:course**
|
||||
@@ -613,7 +621,6 @@
|
||||
- **video.mit.edu**
|
||||
- **VideoDetective**
|
||||
- **videofy.me**
|
||||
- **videolectures.net**
|
||||
- **VideoMega**
|
||||
- **VideoPremium**
|
||||
- **VideoTt**: video.tt - Your True Tube
|
||||
@@ -623,6 +630,7 @@
|
||||
- **vier**
|
||||
- **vier:videos**
|
||||
- **Viewster**
|
||||
- **Viidea**
|
||||
- **viki**
|
||||
- **viki:channel**
|
||||
- **vimeo**
|
||||
@@ -665,6 +673,7 @@
|
||||
- **WSJ**: Wall Street Journal
|
||||
- **XBef**
|
||||
- **XboxClips**
|
||||
- **XFileShare**: XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me
|
||||
- **XHamster**
|
||||
- **XHamsterEmbed**
|
||||
- **XMinus**
|
||||
@@ -699,6 +708,7 @@
|
||||
- **youtube:show**: YouTube.com (multi-season) shows
|
||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
||||
- **youtube:user:playlists**: YouTube.com user playlists
|
||||
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
||||
- **Zapiks**
|
||||
- **ZDF**
|
||||
|
2
setup.py
2
setup.py
@@ -28,7 +28,7 @@ py2exe_options = {
|
||||
"compressed": 1,
|
||||
"optimize": 2,
|
||||
"dist_dir": '.',
|
||||
"dll_excludes": ['w9xpopen.exe'],
|
||||
"dll_excludes": ['w9xpopen.exe', 'crypt32.dll'],
|
||||
}
|
||||
|
||||
py2exe_console = [{
|
||||
|
@@ -37,12 +37,16 @@ class TestInfoExtractor(unittest.TestCase):
|
||||
<meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&key2=val2'/>
|
||||
<meta content='application/x-shockwave-flash' property='og:video:type'>
|
||||
<meta content='Foo' property=og:foobar>
|
||||
<meta name="og:test1" content='foo > < bar'/>
|
||||
<meta name="og:test2" content="foo >//< bar"/>
|
||||
'''
|
||||
self.assertEqual(ie._og_search_title(html), 'Foo')
|
||||
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
||||
self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
|
||||
self.assertEqual(ie._og_search_video_url(html, default=None), None)
|
||||
self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
|
||||
self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
|
||||
self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')
|
||||
|
||||
def test_html_search_meta(self):
|
||||
ie = self.ie
|
||||
|
@@ -13,8 +13,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from youtube_dl.utils import get_filesystem_encoding
|
||||
from youtube_dl.compat import (
|
||||
compat_getenv,
|
||||
compat_etree_fromstring,
|
||||
compat_expanduser,
|
||||
compat_shlex_split,
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
)
|
||||
@@ -71,5 +73,20 @@ class TestCompat(unittest.TestCase):
|
||||
def test_compat_shlex_split(self):
|
||||
self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
|
||||
|
||||
def test_compat_etree_fromstring(self):
|
||||
xml = '''
|
||||
<root foo="bar" spam="中文">
|
||||
<normal>foo</normal>
|
||||
<chinese>中文</chinese>
|
||||
<foo><bar>spam</bar></foo>
|
||||
</root>
|
||||
'''
|
||||
doc = compat_etree_fromstring(xml.encode('utf-8'))
|
||||
self.assertTrue(isinstance(doc.attrib['foo'], compat_str))
|
||||
self.assertTrue(isinstance(doc.attrib['spam'], compat_str))
|
||||
self.assertTrue(isinstance(doc.find('normal').text, compat_str))
|
||||
self.assertTrue(isinstance(doc.find('chinese').text, compat_str))
|
||||
self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -102,7 +102,7 @@ def generator(test_case):
|
||||
|
||||
params = get_params(test_case.get('params', {}))
|
||||
if is_playlist and 'playlist' not in test_case:
|
||||
params.setdefault('extract_flat', True)
|
||||
params.setdefault('extract_flat', 'in_playlist')
|
||||
params.setdefault('skip_download', True)
|
||||
|
||||
ydl = YoutubeDL(params, auto_init=False)
|
||||
|
@@ -19,6 +19,9 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
jsi = JSInterpreter('function x3(){return 42;}')
|
||||
self.assertEqual(jsi.call_function('x3'), 42)
|
||||
|
||||
jsi = JSInterpreter('var x5 = function(){return 42;}')
|
||||
self.assertEqual(jsi.call_function('x5'), 42)
|
||||
|
||||
def test_calc(self):
|
||||
jsi = JSInterpreter('function x4(a){return 2*a+1;}')
|
||||
self.assertEqual(jsi.call_function('x4', 3), 7)
|
||||
|
@@ -28,6 +28,7 @@ from youtube_dl.extractor import (
|
||||
ThePlatformFeedIE,
|
||||
RTVEALaCartaIE,
|
||||
FunnyOrDieIE,
|
||||
DemocracynowIE,
|
||||
)
|
||||
|
||||
|
||||
@@ -346,5 +347,25 @@ class TestFunnyOrDieSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4')
|
||||
|
||||
|
||||
class TestDemocracynowSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.democracynow.org/shows/2015/7/3'
|
||||
IE = DemocracynowIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||
self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
|
||||
|
||||
def test_subtitles_in_page(self):
|
||||
self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree'
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||
self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -21,6 +21,7 @@ from youtube_dl.utils import (
|
||||
clean_html,
|
||||
DateRange,
|
||||
detect_exe_version,
|
||||
determine_ext,
|
||||
encodeFilename,
|
||||
escape_rfc3986,
|
||||
escape_url,
|
||||
@@ -68,6 +69,9 @@ from youtube_dl.utils import (
|
||||
cli_valueless_option,
|
||||
cli_bool_option,
|
||||
)
|
||||
from youtube_dl.compat import (
|
||||
compat_etree_fromstring,
|
||||
)
|
||||
|
||||
|
||||
class TestUtil(unittest.TestCase):
|
||||
@@ -207,8 +211,8 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unescapeHTML('%20;'), '%20;')
|
||||
self.assertEqual(unescapeHTML('/'), '/')
|
||||
self.assertEqual(unescapeHTML('/'), '/')
|
||||
self.assertEqual(
|
||||
unescapeHTML('é'), 'é')
|
||||
self.assertEqual(unescapeHTML('é'), 'é')
|
||||
self.assertEqual(unescapeHTML('�'), '�')
|
||||
|
||||
def test_daterange(self):
|
||||
_20century = DateRange("19000101", "20000101")
|
||||
@@ -233,6 +237,14 @@ class TestUtil(unittest.TestCase):
|
||||
unified_strdate('2/2/2015 6:47:40 PM', day_first=False),
|
||||
'20150202')
|
||||
self.assertEqual(unified_strdate('25-09-2014'), '20140925')
|
||||
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
|
||||
|
||||
def test_determine_ext(self):
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None)
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None)
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None)
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8')
|
||||
|
||||
def test_find_xpath_attr(self):
|
||||
testxml = '''<root>
|
||||
@@ -242,7 +254,7 @@ class TestUtil(unittest.TestCase):
|
||||
<node x="b" y="d" />
|
||||
<node x="" />
|
||||
</root>'''
|
||||
doc = xml.etree.ElementTree.fromstring(testxml)
|
||||
doc = compat_etree_fromstring(testxml)
|
||||
|
||||
self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n'), None)
|
||||
self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None)
|
||||
@@ -263,7 +275,7 @@ class TestUtil(unittest.TestCase):
|
||||
<url>http://server.com/download.mp3</url>
|
||||
</media:song>
|
||||
</root>'''
|
||||
doc = xml.etree.ElementTree.fromstring(testxml)
|
||||
doc = compat_etree_fromstring(testxml)
|
||||
find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'}))
|
||||
self.assertTrue(find('media:song') is not None)
|
||||
self.assertEqual(find('media:song/media:author').text, 'The Author')
|
||||
@@ -275,9 +287,16 @@ class TestUtil(unittest.TestCase):
|
||||
p = xml.etree.ElementTree.SubElement(div, 'p')
|
||||
p.text = 'Foo'
|
||||
self.assertEqual(xpath_element(doc, 'div/p'), p)
|
||||
self.assertEqual(xpath_element(doc, ['div/p']), p)
|
||||
self.assertEqual(xpath_element(doc, ['div/bar', 'div/p']), p)
|
||||
self.assertEqual(xpath_element(doc, 'div/bar', default='default'), 'default')
|
||||
self.assertEqual(xpath_element(doc, ['div/bar'], default='default'), 'default')
|
||||
self.assertTrue(xpath_element(doc, 'div/bar') is None)
|
||||
self.assertTrue(xpath_element(doc, ['div/bar']) is None)
|
||||
self.assertTrue(xpath_element(doc, ['div/bar'], 'div/baz') is None)
|
||||
self.assertRaises(ExtractorError, xpath_element, doc, 'div/bar', fatal=True)
|
||||
self.assertRaises(ExtractorError, xpath_element, doc, ['div/bar'], fatal=True)
|
||||
self.assertRaises(ExtractorError, xpath_element, doc, ['div/bar', 'div/baz'], fatal=True)
|
||||
|
||||
def test_xpath_text(self):
|
||||
testxml = '''<root>
|
||||
@@ -285,7 +304,7 @@ class TestUtil(unittest.TestCase):
|
||||
<p>Foo</p>
|
||||
</div>
|
||||
</root>'''
|
||||
doc = xml.etree.ElementTree.fromstring(testxml)
|
||||
doc = compat_etree_fromstring(testxml)
|
||||
self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
|
||||
self.assertEqual(xpath_text(doc, 'div/bar', default='default'), 'default')
|
||||
self.assertTrue(xpath_text(doc, 'div/bar') is None)
|
||||
@@ -297,7 +316,7 @@ class TestUtil(unittest.TestCase):
|
||||
<p x="a">Foo</p>
|
||||
</div>
|
||||
</root>'''
|
||||
doc = xml.etree.ElementTree.fromstring(testxml)
|
||||
doc = compat_etree_fromstring(testxml)
|
||||
self.assertEqual(xpath_attr(doc, 'div/p', 'x'), 'a')
|
||||
self.assertEqual(xpath_attr(doc, 'div/bar', 'x'), None)
|
||||
self.assertEqual(xpath_attr(doc, 'div/p', 'y'), None)
|
||||
@@ -425,6 +444,8 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
|
||||
self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266)
|
||||
self.assertEqual(parse_iso8601('2014-03-23T22:04:26.1234Z'), 1395612266)
|
||||
self.assertEqual(parse_iso8601('2015-09-29T08:27:31.727'), 1443515251)
|
||||
self.assertEqual(parse_iso8601('2015-09-29T08-27-31.727'), None)
|
||||
|
||||
def test_strip_jsonp(self):
|
||||
stripped = strip_jsonp('cb ([ {"id":"532cb",\n\n\n"x":\n3}\n]\n);')
|
||||
@@ -495,6 +516,9 @@ class TestUtil(unittest.TestCase):
|
||||
"playlist":[{"controls":{"all":null}}]
|
||||
}''')
|
||||
|
||||
inp = '''"The CW\\'s \\'Crazy Ex-Girlfriend\\'"'''
|
||||
self.assertEqual(js_to_json(inp), '''"The CW's 'Crazy Ex-Girlfriend'"''')
|
||||
|
||||
inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"'
|
||||
json_code = js_to_json(inp)
|
||||
self.assertEqual(json.loads(json_code), json.loads(inp))
|
||||
|
@@ -57,5 +57,14 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
entries = result['entries']
|
||||
self.assertEqual(len(entries), 100)
|
||||
|
||||
def test_youtube_flat_playlist_titles(self):
|
||||
dl = FakeYDL()
|
||||
dl.params['extract_flat'] = True
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||
self.assertIsPlaylist(result)
|
||||
for entry in result['entries']:
|
||||
self.assertTrue(entry.get('title'))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -28,6 +28,7 @@ if os.name == 'nt':
|
||||
import ctypes
|
||||
|
||||
from .compat import (
|
||||
compat_basestring,
|
||||
compat_cookiejar,
|
||||
compat_expanduser,
|
||||
compat_get_terminal_size,
|
||||
@@ -37,6 +38,7 @@ from .compat import (
|
||||
compat_tokenize_tokenize,
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
compat_urllib_request_DataHandler,
|
||||
)
|
||||
from .utils import (
|
||||
ContentTooShortError,
|
||||
@@ -62,6 +64,7 @@ from .utils import (
|
||||
SameFileError,
|
||||
sanitize_filename,
|
||||
sanitize_path,
|
||||
sanitized_Request,
|
||||
std_headers,
|
||||
subtitles_filename,
|
||||
UnavailableVideoError,
|
||||
@@ -155,7 +158,7 @@ class YoutubeDL(object):
|
||||
writethumbnail: Write the thumbnail image to a file
|
||||
write_all_thumbnails: Write all thumbnail formats to files
|
||||
writesubtitles: Write the video subtitles to a file
|
||||
writeautomaticsub: Write the automatic subtitles to a file
|
||||
writeautomaticsub: Write the automatically generated subtitles to a file
|
||||
allsubtitles: Downloads all the subtitles of the video
|
||||
(requires writesubtitles or writeautomaticsub)
|
||||
listsubtitles: Lists all available subtitles for the video
|
||||
@@ -571,7 +574,7 @@ class YoutubeDL(object):
|
||||
if v is not None)
|
||||
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
|
||||
|
||||
outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
|
||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||
tmpl = compat_expanduser(outtmpl)
|
||||
filename = tmpl % template_dict
|
||||
# Temporary fix for #4787
|
||||
@@ -579,7 +582,7 @@ class YoutubeDL(object):
|
||||
# to workaround encoding issues with subprocess on python2 @ Windows
|
||||
if sys.version_info < (3, 0) and sys.platform == 'win32':
|
||||
filename = encodeFilename(filename, True).decode(preferredencoding())
|
||||
return filename
|
||||
return sanitize_path(filename)
|
||||
except ValueError as err:
|
||||
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
|
||||
return None
|
||||
@@ -832,6 +835,7 @@ class YoutubeDL(object):
|
||||
extra_info=extra)
|
||||
playlist_results.append(entry_result)
|
||||
ie_result['entries'] = playlist_results
|
||||
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
|
||||
return ie_result
|
||||
elif result_type == 'compat_list':
|
||||
self.report_warning(
|
||||
@@ -936,7 +940,7 @@ class YoutubeDL(object):
|
||||
filter_parts.append(string)
|
||||
|
||||
def _remove_unused_ops(tokens):
|
||||
# Remove operators that we don't use and join them with the sourrounding strings
|
||||
# Remove operators that we don't use and join them with the surrounding strings
|
||||
# for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
|
||||
ALLOWED_OPS = ('/', '+', ',', '(', ')')
|
||||
last_string, last_start, last_end, last_line = None, None, None, None
|
||||
@@ -1185,7 +1189,7 @@ class YoutubeDL(object):
|
||||
return res
|
||||
|
||||
def _calc_cookies(self, info_dict):
|
||||
pr = compat_urllib_request.Request(info_dict['url'])
|
||||
pr = sanitized_Request(info_dict['url'])
|
||||
self.cookiejar.add_cookie_header(pr)
|
||||
return pr.get_header('Cookie')
|
||||
|
||||
@@ -1869,6 +1873,8 @@ class YoutubeDL(object):
|
||||
|
||||
def urlopen(self, req):
|
||||
""" Start an HTTP download """
|
||||
if isinstance(req, compat_basestring):
|
||||
req = sanitized_Request(req)
|
||||
return self._opener.open(req, timeout=self._socket_timeout)
|
||||
|
||||
def print_debug_header(self):
|
||||
@@ -1967,8 +1973,9 @@ class YoutubeDL(object):
|
||||
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
|
||||
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
|
||||
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
|
||||
data_handler = compat_urllib_request_DataHandler()
|
||||
opener = compat_urllib_request.build_opener(
|
||||
proxy_handler, https_handler, cookie_processor, ydlh)
|
||||
proxy_handler, https_handler, cookie_processor, ydlh, data_handler)
|
||||
|
||||
# Delete the default user-agent header, which would otherwise apply in
|
||||
# cases where our custom HTTP handler doesn't come into play
|
||||
|
@@ -377,7 +377,7 @@ def _real_main(argv=None):
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
# Update version
|
||||
if opts.update_self:
|
||||
update_self(ydl.to_screen, opts.verbose)
|
||||
update_self(ydl.to_screen, opts.verbose, ydl._opener)
|
||||
|
||||
# Remove cache dir
|
||||
if opts.rm_cachedir:
|
||||
|
@@ -1,7 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import binascii
|
||||
import collections
|
||||
import email
|
||||
import getpass
|
||||
import io
|
||||
import optparse
|
||||
import os
|
||||
import re
|
||||
@@ -11,6 +14,7 @@ import socket
|
||||
import subprocess
|
||||
import sys
|
||||
import itertools
|
||||
import xml.etree.ElementTree
|
||||
|
||||
|
||||
try:
|
||||
@@ -38,6 +42,11 @@ try:
|
||||
except ImportError: # Python 2
|
||||
import urlparse as compat_urlparse
|
||||
|
||||
try:
|
||||
import urllib.response as compat_urllib_response
|
||||
except ImportError: # Python 2
|
||||
import urllib as compat_urllib_response
|
||||
|
||||
try:
|
||||
import http.cookiejar as compat_cookiejar
|
||||
except ImportError: # Python 2
|
||||
@@ -155,6 +164,40 @@ except ImportError: # Python 2
|
||||
string = string.replace('+', ' ')
|
||||
return compat_urllib_parse_unquote(string, encoding, errors)
|
||||
|
||||
try:
|
||||
from urllib.request import DataHandler as compat_urllib_request_DataHandler
|
||||
except ImportError: # Python < 3.4
|
||||
# Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
|
||||
class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
|
||||
def data_open(self, req):
|
||||
# data URLs as specified in RFC 2397.
|
||||
#
|
||||
# ignores POSTed data
|
||||
#
|
||||
# syntax:
|
||||
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
|
||||
# mediatype := [ type "/" subtype ] *( ";" parameter )
|
||||
# data := *urlchar
|
||||
# parameter := attribute "=" value
|
||||
url = req.get_full_url()
|
||||
|
||||
scheme, data = url.split(":", 1)
|
||||
mediatype, data = data.split(",", 1)
|
||||
|
||||
# even base64 encoded data URLs might be quoted so unquote in any case:
|
||||
data = compat_urllib_parse_unquote_to_bytes(data)
|
||||
if mediatype.endswith(";base64"):
|
||||
data = binascii.a2b_base64(data)
|
||||
mediatype = mediatype[:-7]
|
||||
|
||||
if not mediatype:
|
||||
mediatype = "text/plain;charset=US-ASCII"
|
||||
|
||||
headers = email.message_from_string(
|
||||
"Content-type: %s\nContent-length: %d\n" % (mediatype, len(data)))
|
||||
|
||||
return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
|
||||
|
||||
try:
|
||||
compat_basestring = basestring # Python 2
|
||||
except NameError:
|
||||
@@ -170,6 +213,43 @@ try:
|
||||
except ImportError: # Python 2.6
|
||||
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
||||
|
||||
if sys.version_info[0] >= 3:
|
||||
compat_etree_fromstring = xml.etree.ElementTree.fromstring
|
||||
else:
|
||||
# python 2.x tries to encode unicode strings with ascii (see the
|
||||
# XMLParser._fixtext method)
|
||||
etree = xml.etree.ElementTree
|
||||
|
||||
try:
|
||||
_etree_iter = etree.Element.iter
|
||||
except AttributeError: # Python <=2.6
|
||||
def _etree_iter(root):
|
||||
for el in root.findall('*'):
|
||||
yield el
|
||||
for sub in _etree_iter(el):
|
||||
yield sub
|
||||
|
||||
# on 2.6 XML doesn't have a parser argument, function copied from CPython
|
||||
# 2.7 source
|
||||
def _XML(text, parser=None):
|
||||
if not parser:
|
||||
parser = etree.XMLParser(target=etree.TreeBuilder())
|
||||
parser.feed(text)
|
||||
return parser.close()
|
||||
|
||||
def _element_factory(*args, **kwargs):
|
||||
el = etree.Element(*args, **kwargs)
|
||||
for k, v in el.items():
|
||||
if isinstance(v, bytes):
|
||||
el.set(k, v.decode('utf-8'))
|
||||
return el
|
||||
|
||||
def compat_etree_fromstring(text):
|
||||
doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
|
||||
for el in _etree_iter(doc):
|
||||
if el.text is not None and isinstance(el.text, bytes):
|
||||
el.text = el.text.decode('utf-8')
|
||||
return doc
|
||||
|
||||
try:
|
||||
from urllib.parse import parse_qs as compat_parse_qs
|
||||
@@ -465,6 +545,7 @@ __all__ = [
|
||||
'compat_chr',
|
||||
'compat_cookiejar',
|
||||
'compat_cookies',
|
||||
'compat_etree_fromstring',
|
||||
'compat_expanduser',
|
||||
'compat_get_terminal_size',
|
||||
'compat_getenv',
|
||||
@@ -489,6 +570,8 @@ __all__ = [
|
||||
'compat_urllib_parse_unquote_to_bytes',
|
||||
'compat_urllib_parse_urlparse',
|
||||
'compat_urllib_request',
|
||||
'compat_urllib_request_DataHandler',
|
||||
'compat_urllib_response',
|
||||
'compat_urlparse',
|
||||
'compat_urlretrieve',
|
||||
'compat_xml_parse_error',
|
||||
|
@@ -42,7 +42,7 @@ class FileDownloader(object):
|
||||
min_filesize: Skip files smaller than this size
|
||||
max_filesize: Skip files larger than this size
|
||||
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
|
||||
(experimenatal)
|
||||
(experimental)
|
||||
external_downloader_args: A list of additional command-line arguments for the
|
||||
external downloader.
|
||||
|
||||
|
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import sanitized_Request
|
||||
|
||||
|
||||
class DashSegmentsFD(FileDownloader):
|
||||
@@ -22,7 +22,7 @@ class DashSegmentsFD(FileDownloader):
|
||||
|
||||
def append_url_to_file(outf, target_url, target_name, remaining_bytes=None):
|
||||
self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name))
|
||||
req = compat_urllib_request.Request(target_url)
|
||||
req = sanitized_Request(target_url)
|
||||
if remaining_bytes is not None:
|
||||
req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))
|
||||
|
||||
|
@@ -5,12 +5,13 @@ import io
|
||||
import itertools
|
||||
import os
|
||||
import time
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_urlparse,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
@@ -285,9 +286,11 @@ class F4mFD(FragmentFD):
|
||||
man_url = info_dict['url']
|
||||
requested_bitrate = info_dict.get('tbr')
|
||||
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
|
||||
manifest = self.ydl.urlopen(man_url).read()
|
||||
urlh = self.ydl.urlopen(man_url)
|
||||
man_url = urlh.geturl()
|
||||
manifest = urlh.read()
|
||||
|
||||
doc = etree.fromstring(manifest)
|
||||
doc = compat_etree_fromstring(manifest)
|
||||
formats = [(int(f.attrib.get('bitrate', -1)), f)
|
||||
for f in self._get_unencrypted_media(doc)]
|
||||
if requested_bitrate is None:
|
||||
@@ -329,20 +332,25 @@ class F4mFD(FragmentFD):
|
||||
if not live:
|
||||
write_metadata_tag(dest_stream, metadata)
|
||||
|
||||
base_url_parsed = compat_urllib_parse_urlparse(base_url)
|
||||
|
||||
self._start_frag_download(ctx)
|
||||
|
||||
frags_filenames = []
|
||||
while fragments_list:
|
||||
seg_i, frag_i = fragments_list.pop(0)
|
||||
name = 'Seg%d-Frag%d' % (seg_i, frag_i)
|
||||
url = base_url + name
|
||||
query = []
|
||||
if base_url_parsed.query:
|
||||
query.append(base_url_parsed.query)
|
||||
if akamai_pv:
|
||||
url += '?' + akamai_pv.strip(';')
|
||||
query.append(akamai_pv.strip(';'))
|
||||
if info_dict.get('extra_param_to_segment_url'):
|
||||
url += info_dict.get('extra_param_to_segment_url')
|
||||
query.append(info_dict['extra_param_to_segment_url'])
|
||||
url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
|
||||
frag_filename = '%s-%s' % (ctx['tmpfilename'], name)
|
||||
try:
|
||||
success = ctx['dl'].download(frag_filename, {'url': url})
|
||||
success = ctx['dl'].download(frag_filename, {'url': url_parsed.geturl()})
|
||||
if not success:
|
||||
return False
|
||||
(down, frag_sanitized) = sanitize_open(frag_filename, 'rb')
|
||||
|
@@ -30,7 +30,7 @@ class HlsFD(FileDownloader):
|
||||
|
||||
args = [ffpp.executable, '-y']
|
||||
|
||||
if info_dict['http_headers']:
|
||||
if info_dict['http_headers'] and re.match(r'^https?://', url):
|
||||
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
|
||||
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
|
||||
args += [
|
||||
|
@@ -7,14 +7,12 @@ import time
|
||||
import re
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_error,
|
||||
)
|
||||
from ..compat import compat_urllib_error
|
||||
from ..utils import (
|
||||
ContentTooShortError,
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -29,8 +27,8 @@ class HttpFD(FileDownloader):
|
||||
add_headers = info_dict.get('http_headers')
|
||||
if add_headers:
|
||||
headers.update(add_headers)
|
||||
basic_request = compat_urllib_request.Request(url, None, headers)
|
||||
request = compat_urllib_request.Request(url, None, headers)
|
||||
basic_request = sanitized_Request(url, None, headers)
|
||||
request = sanitized_Request(url, None, headers)
|
||||
|
||||
is_test = self.params.get('test', False)
|
||||
|
||||
|
@@ -117,7 +117,7 @@ class RtmpFD(FileDownloader):
|
||||
return False
|
||||
|
||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||
# the connection was interrumpted and resuming appears to be
|
||||
# the connection was interrupted and resuming appears to be
|
||||
# possible. This is part of rtmpdump's normal usage, AFAIK.
|
||||
basic_args = [
|
||||
'rtmpdump', '--verbose', '-r', url,
|
||||
|
@@ -45,6 +45,7 @@ from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bbc import (
|
||||
BBCCoUkIE,
|
||||
BBCCoUkArticleIE,
|
||||
BBCIE,
|
||||
)
|
||||
from .beeg import BeegIE
|
||||
@@ -59,7 +60,10 @@ from .bloomberg import BloombergIE
|
||||
from .bpb import BpbIE
|
||||
from .br import BRIE
|
||||
from .breakcom import BreakIE
|
||||
from .brightcove import BrightcoveIE
|
||||
from .brightcove import (
|
||||
BrightcoveLegacyIE,
|
||||
BrightcoveNewIE,
|
||||
)
|
||||
from .buzzfeed import BuzzFeedIE
|
||||
from .byutv import BYUtvIE
|
||||
from .c56 import C56IE
|
||||
@@ -89,6 +93,7 @@ from .cliphunter import CliphunterIE
|
||||
from .clipsyndicate import ClipsyndicateIE
|
||||
from .cloudy import CloudyIE
|
||||
from .clubic import ClubicIE
|
||||
from .clyp import ClypIE
|
||||
from .cmt import CMTIE
|
||||
from .cnet import CNETIE
|
||||
from .cnn import (
|
||||
@@ -122,10 +127,12 @@ from .dbtv import DBTVIE
|
||||
from .dcn import DCNIE
|
||||
from .dctp import DctpTvIE
|
||||
from .deezer import DeezerPlaylistIE
|
||||
from .democracynow import DemocracynowIE
|
||||
from .dfb import DFBIE
|
||||
from .dhm import DHMIE
|
||||
from .dotsub import DotsubIE
|
||||
from .douyutv import DouyuTVIE
|
||||
from .dplay import DPlayIE
|
||||
from .dramafever import (
|
||||
DramaFeverIE,
|
||||
DramaFeverSeriesIE,
|
||||
@@ -209,13 +216,15 @@ from .gfycat import GfycatIE
|
||||
from .giantbomb import GiantBombIE
|
||||
from .giga import GigaIE
|
||||
from .glide import GlideIE
|
||||
from .globo import GloboIE
|
||||
from .globo import (
|
||||
GloboIE,
|
||||
GloboArticleIE,
|
||||
)
|
||||
from .godtube import GodTubeIE
|
||||
from .goldenmoustache import GoldenMoustacheIE
|
||||
from .golem import GolemIE
|
||||
from .googleplus import GooglePlusIE
|
||||
from .googlesearch import GoogleSearchIE
|
||||
from .gorillavid import GorillaVidIE
|
||||
from .goshgay import GoshgayIE
|
||||
from .groupon import GrouponIE
|
||||
from .hark import HarkIE
|
||||
@@ -412,7 +421,10 @@ from .nowness import (
|
||||
NownessPlaylistIE,
|
||||
NownessSeriesIE,
|
||||
)
|
||||
from .nowtv import NowTVIE
|
||||
from .nowtv import (
|
||||
NowTVIE,
|
||||
NowTVListIE,
|
||||
)
|
||||
from .nowvideo import NowVideoIE
|
||||
from .npo import (
|
||||
NPOIE,
|
||||
@@ -450,10 +462,7 @@ from .orf import (
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .patreon import PatreonIE
|
||||
from .pbs import PBSIE
|
||||
from .periscope import (
|
||||
PeriscopeIE,
|
||||
QuickscopeIE,
|
||||
)
|
||||
from .periscope import PeriscopeIE
|
||||
from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||
from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
@@ -567,7 +576,8 @@ from .soundcloud import (
|
||||
SoundcloudIE,
|
||||
SoundcloudSetIE,
|
||||
SoundcloudUserIE,
|
||||
SoundcloudPlaylistIE
|
||||
SoundcloudPlaylistIE,
|
||||
SoundcloudSearchIE
|
||||
)
|
||||
from .soundgasm import (
|
||||
SoundgasmIE,
|
||||
@@ -586,6 +596,7 @@ from .spankwire import SpankwireIE
|
||||
from .spiegel import SpiegelIE, SpiegelArticleIE
|
||||
from .spiegeltv import SpiegeltvIE
|
||||
from .spike import SpikeIE
|
||||
from .stitcher import StitcherIE
|
||||
from .sport5 import Sport5IE
|
||||
from .sportbox import (
|
||||
SportBoxIE,
|
||||
@@ -690,7 +701,7 @@ from .twitch import (
|
||||
TwitchBookmarksIE,
|
||||
TwitchStreamIE,
|
||||
)
|
||||
from .twitter import TwitterCardIE
|
||||
from .twitter import TwitterCardIE, TwitterIE
|
||||
from .ubu import UbuIE
|
||||
from .udemy import (
|
||||
UdemyIE,
|
||||
@@ -717,7 +728,6 @@ from .vh1 import VH1IE
|
||||
from .vice import ViceIE
|
||||
from .viddler import ViddlerIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
from .videolecturesnet import VideoLecturesNetIE
|
||||
from .videofyme import VideofyMeIE
|
||||
from .videomega import VideoMegaIE
|
||||
from .videopremium import VideoPremiumIE
|
||||
@@ -727,6 +737,7 @@ from .vidme import VidmeIE
|
||||
from .vidzi import VidziIE
|
||||
from .vier import VierIE, VierVideosIE
|
||||
from .viewster import ViewsterIE
|
||||
from .viidea import ViideaIE
|
||||
from .vimeo import (
|
||||
VimeoIE,
|
||||
VimeoAlbumIE,
|
||||
@@ -779,6 +790,7 @@ from .wrzuta import WrzutaIE
|
||||
from .wsj import WSJIE
|
||||
from .xbef import XBefIE
|
||||
from .xboxclips import XboxClipsIE
|
||||
from .xfileshare import XFileShareIE
|
||||
from .xhamster import (
|
||||
XHamsterIE,
|
||||
XHamsterEmbedIE,
|
||||
@@ -822,6 +834,7 @@ from .youtube import (
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeUserIE,
|
||||
YoutubeUserPlaylistsIE,
|
||||
YoutubeWatchLaterIE,
|
||||
)
|
||||
from .zapiks import ZapiksIE
|
||||
|
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
class ABCIE(InfoExtractor):
|
||||
IE_NAME = 'abc.net.au'
|
||||
_VALID_URL = r'http://www\.abc\.net\.au/news/[^/]+/[^/]+/(?P<id>\d+)'
|
||||
_VALID_URL = r'http://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
|
||||
@@ -36,6 +36,18 @@ class ABCIE(InfoExtractor):
|
||||
'title': 'Marriage Equality: Warren Entsch introduces same sex marriage bill',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'http://www.abc.net.au/news/2015-10-23/nab-lifts-interest-rates-following-westpac-and-cba/6880080',
|
||||
'md5': 'b96eee7c9edf4fc5a358a0252881cc1f',
|
||||
'info_dict': {
|
||||
'id': '6880080',
|
||||
'ext': 'mp3',
|
||||
'title': 'NAB lifts interest rates, following Westpac and CBA',
|
||||
'description': 'md5:f13d8edc81e462fce4a0437c7dc04728',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.abc.net.au/news/2015-10-19/6866214',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -43,7 +55,7 @@ class ABCIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
mobj = re.search(
|
||||
r'inline(?P<type>Video|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
|
||||
r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
|
||||
webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError('Unable to extract video urls')
|
||||
@@ -60,11 +72,13 @@ class ABCIE(InfoExtractor):
|
||||
|
||||
formats = [{
|
||||
'url': url_info['url'],
|
||||
'vcodec': url_info.get('codec') if mobj.group('type') == 'Video' else 'none',
|
||||
'width': int_or_none(url_info.get('width')),
|
||||
'height': int_or_none(url_info.get('height')),
|
||||
'tbr': int_or_none(url_info.get('bitrate')),
|
||||
'filesize': int_or_none(url_info.get('filesize')),
|
||||
} for url_info in urls_info]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@@ -183,7 +183,7 @@ class AdultSwimIE(InfoExtractor):
|
||||
media_url = file_el.text
|
||||
if determine_ext(media_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, segment_title, 'mp4', 'm3u8_native', preference=0, m3u8_id='hls'))
|
||||
media_url, segment_title, 'mp4', preference=0, m3u8_id='hls'))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': '%s_%s' % (bitrate, ftype),
|
||||
|
@@ -15,7 +15,7 @@ class AlJazeeraIE(InfoExtractor):
|
||||
'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.',
|
||||
'uploader': 'Al Jazeera English',
|
||||
},
|
||||
'add_ie': ['Brightcove'],
|
||||
'add_ie': ['BrightcoveLegacy'],
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
}
|
||||
|
||||
@@ -32,5 +32,5 @@ class AlJazeeraIE(InfoExtractor):
|
||||
'playerKey=AQ~~%2CAAAAmtVJIFk~%2CTVGOQ5ZTwJbeMWnq5d_H4MOM57xfzApc'
|
||||
'&%40videoPlayer={0}'.format(brightcove_id)
|
||||
),
|
||||
'ie_key': 'Brightcove',
|
||||
'ie_key': 'BrightcoveLegacy',
|
||||
}
|
||||
|
@@ -26,8 +26,8 @@ class AnitubeIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
key = self._html_search_regex(
|
||||
r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', webpage, 'key')
|
||||
key = self._search_regex(
|
||||
r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key')
|
||||
|
||||
config_xml = self._download_xml(
|
||||
'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key)
|
||||
|
@@ -14,8 +14,8 @@ from ..utils import (
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
xpath_text,
|
||||
parse_xml,
|
||||
)
|
||||
from ..compat import compat_etree_fromstring
|
||||
|
||||
|
||||
class ARDMediathekIE(InfoExtractor):
|
||||
@@ -161,7 +161,7 @@ class ARDMediathekIE(InfoExtractor):
|
||||
raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True)
|
||||
|
||||
if re.search(r'[\?&]rss($|[=&])', url):
|
||||
doc = parse_xml(webpage)
|
||||
doc = compat_etree_fromstring(webpage.encode('utf-8'))
|
||||
if doc.tag == 'rss':
|
||||
return GenericIE()._extract_rss(url, video_id, doc)
|
||||
|
||||
|
@@ -7,11 +7,11 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
xpath_text,
|
||||
ExtractorError,
|
||||
)
|
||||
@@ -63,7 +63,7 @@ class AtresPlayerIE(InfoExtractor):
|
||||
'j_password': password,
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
response = self._download_webpage(
|
||||
@@ -94,7 +94,7 @@ class AtresPlayerIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for fmt in ['windows', 'android_tablet']:
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
|
||||
request.add_header('User-Agent', self._USER_AGENT)
|
||||
|
||||
|
@@ -6,13 +6,13 @@ import itertools
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@ class BambuserIE(InfoExtractor):
|
||||
'pass': password,
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
request.add_header('Referer', self._LOGIN_URL)
|
||||
response = self._download_webpage(
|
||||
@@ -126,7 +126,7 @@ class BambuserChannelIE(InfoExtractor):
|
||||
'&sort=created&access_mode=0%2C1%2C2&limit={count}'
|
||||
'&method=broadcast&format=json&vid_older_than={last}'
|
||||
).format(user=user, count=self._STEP, last=last_id)
|
||||
req = compat_urllib_request.Request(req_url)
|
||||
req = sanitized_Request(req_url)
|
||||
# Without setting this header, we wouldn't get any result
|
||||
req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
|
||||
data = self._download_json(
|
||||
|
@@ -2,7 +2,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -14,18 +13,21 @@ from ..utils import (
|
||||
remove_end,
|
||||
unescapeHTML,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_HTTPError,
|
||||
)
|
||||
|
||||
|
||||
class BBCCoUkIE(InfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:programmes/(?!articles/)|iplayer(?:/[^/]+)?/(?:episode/|playlist/))|music/clips[/#])(?P<id>[\da-z]{8})'
|
||||
|
||||
_MEDIASELECTOR_URLS = [
|
||||
# Provides HQ HLS streams with even better quality that pc mediaset but fails
|
||||
# with geolocation in some cases when it's even not geo restricted at all (e.g.
|
||||
# http://www.bbc.co.uk/programmes/b06bp7lf)
|
||||
# http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
|
||||
]
|
||||
@@ -332,7 +334,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
return self._download_media_selector_url(
|
||||
mediaselector_url % programme_id, programme_id)
|
||||
except BBCCoUkIE.MediaSelectionError as e:
|
||||
if e.id in ('notukerror', 'geolocation'):
|
||||
if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
|
||||
last_exception = e
|
||||
continue
|
||||
self._raise_extractor_error(e)
|
||||
@@ -343,8 +345,8 @@ class BBCCoUkIE(InfoExtractor):
|
||||
media_selection = self._download_xml(
|
||||
url, programme_id, 'Downloading media selection XML')
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||
media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().decode('utf-8'))
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code in (403, 404):
|
||||
media_selection = compat_etree_fromstring(ee.cause.read().decode('utf-8'))
|
||||
else:
|
||||
raise
|
||||
return self._process_media_selector(media_selection, programme_id)
|
||||
@@ -625,6 +627,7 @@ class BBCIE(BBCCoUkIE):
|
||||
'id': 'p02xycnp',
|
||||
'ext': 'mp4',
|
||||
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
||||
'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
|
||||
'duration': 140,
|
||||
},
|
||||
'params': {
|
||||
@@ -651,7 +654,7 @@ class BBCIE(BBCCoUkIE):
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if BBCCoUkIE.suitable(url) else super(BBCIE, cls).suitable(url)
|
||||
return False if BBCCoUkIE.suitable(url) or BBCCoUkArticleIE.suitable(url) else super(BBCIE, cls).suitable(url)
|
||||
|
||||
def _extract_from_media_meta(self, media_meta, video_id):
|
||||
# Direct links to media in media metadata (e.g.
|
||||
@@ -902,3 +905,33 @@ class BBCIE(BBCCoUkIE):
|
||||
})
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
|
||||
class BBCCoUkArticleIE(InfoExtractor):
|
||||
_VALID_URL = 'http://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
|
||||
IE_NAME = 'bbc.co.uk:article'
|
||||
IE_DESC = 'BBC articles'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.bbc.co.uk/programmes/articles/3jNQLTMrPlYGTBn0WV6M2MS/not-your-typical-role-model-ada-lovelace-the-19th-century-programmer',
|
||||
'info_dict': {
|
||||
'id': '3jNQLTMrPlYGTBn0WV6M2MS',
|
||||
'title': 'Calculating Ada: The Countess of Computing - Not your typical role model: Ada Lovelace the 19th century programmer - BBC Four',
|
||||
'description': 'Hannah Fry reveals some of her surprising discoveries about Ada Lovelace during filming.',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
'add_ie': ['BBCCoUk'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage).strip()
|
||||
|
||||
entries = [self.url_result(programme_url) for programme_url in re.findall(
|
||||
r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)]
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
|
@@ -33,6 +33,8 @@ class BeegIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for format_id, video_url in video.items():
|
||||
if not video_url:
|
||||
continue
|
||||
height = self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None)
|
||||
if not height:
|
||||
|
@@ -4,9 +4,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
import itertools
|
||||
import json
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
@@ -88,7 +90,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
lq_doc = ET.fromstring(lq_page)
|
||||
lq_doc = compat_etree_fromstring(lq_page)
|
||||
lq_durls = lq_doc.findall('./durl')
|
||||
|
||||
hq_doc = self._download_xml(
|
||||
|
@@ -4,14 +4,12 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
unescapeHTML,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
@@ -219,7 +217,7 @@ class BlipTVIE(InfoExtractor):
|
||||
for lang, url in subtitles_urls.items():
|
||||
# For some weird reason, blip.tv serves a video instead of subtitles
|
||||
# when we request with a common UA
|
||||
req = compat_urllib_request.Request(url)
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('User-Agent', 'youtube-dl')
|
||||
subtitles[lang] = [{
|
||||
# The extension is 'srt' but it's actually an 'ass' file
|
||||
|
@@ -6,9 +6,9 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class BloombergIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.bloomberg\.com/news/videos/[^/]+/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://www\.bloomberg\.com/news/[^/]+/[^/]+/(?P<id>[^/?#]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',
|
||||
# The md5 checksum changes
|
||||
'info_dict': {
|
||||
@@ -17,7 +17,10 @@ class BloombergIE(InfoExtractor):
|
||||
'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
|
||||
'description': 'md5:a8ba0302912d03d246979735c17d2761',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
name = self._match_id(url)
|
||||
|
@@ -3,15 +3,14 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
compat_xml_parse_error,
|
||||
)
|
||||
@@ -20,12 +19,18 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
js_to_json,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class BrightcoveIE(InfoExtractor):
|
||||
class BrightcoveLegacyIE(InfoExtractor):
|
||||
IE_NAME = 'brightcove:legacy'
|
||||
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
|
||||
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
|
||||
|
||||
@@ -119,7 +124,7 @@ class BrightcoveIE(InfoExtractor):
|
||||
object_str = fix_xml_ampersands(object_str)
|
||||
|
||||
try:
|
||||
object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
|
||||
object_doc = compat_etree_fromstring(object_str.encode('utf-8'))
|
||||
except compat_xml_parse_error:
|
||||
return
|
||||
|
||||
@@ -245,7 +250,7 @@ class BrightcoveIE(InfoExtractor):
|
||||
|
||||
def _get_video_info(self, video_id, query_str, query, referer=None):
|
||||
request_url = self._FEDERATED_URL_TEMPLATE % query_str
|
||||
req = compat_urllib_request.Request(request_url)
|
||||
req = sanitized_Request(request_url)
|
||||
linkBase = query.get('linkBaseURL')
|
||||
if linkBase is not None:
|
||||
referer = linkBase[0]
|
||||
@@ -346,3 +351,172 @@ class BrightcoveIE(InfoExtractor):
|
||||
if 'url' not in info and not info.get('formats'):
|
||||
raise ExtractorError('Unable to extract video url for %s' % info['id'])
|
||||
return info
|
||||
|
||||
|
||||
class BrightcoveNewIE(InfoExtractor):
|
||||
IE_NAME = 'brightcove:new'
|
||||
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
|
||||
'md5': 'c8100925723840d4b0d243f7025703be',
|
||||
'info_dict': {
|
||||
'id': '4463358922001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Meet the man behind Popcorn Time',
|
||||
'description': 'md5:eac376a4fe366edc70279bfb681aea16',
|
||||
'duration': 165.768,
|
||||
'timestamp': 1441391203,
|
||||
'upload_date': '20150904',
|
||||
'uploader_id': '929656772001',
|
||||
'formats': 'mincount:22',
|
||||
},
|
||||
}, {
|
||||
# with rtmp streams
|
||||
'url': 'http://players.brightcove.net/4036320279001/5d112ed9-283f-485f-a7f9-33f42e8bc042_default/index.html?videoId=4279049078001',
|
||||
'info_dict': {
|
||||
'id': '4279049078001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Titansgrave: Chapter 0',
|
||||
'description': 'Titansgrave: Chapter 0',
|
||||
'duration': 1242.058,
|
||||
'timestamp': 1433556729,
|
||||
'upload_date': '20150606',
|
||||
'uploader_id': '4036320279001',
|
||||
'formats': 'mincount:41',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
# Reference:
|
||||
# 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
|
||||
# 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript)
|
||||
# 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/embed-in-page.html
|
||||
|
||||
entries = []
|
||||
|
||||
# Look for iframe embeds [1]
|
||||
for _, url in re.findall(
|
||||
r'<iframe[^>]+src=(["\'])((?:https?:)//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
|
||||
entries.append(url)
|
||||
|
||||
# Look for embed_in_page embeds [2]
|
||||
for video_id, account_id, player_id, embed in re.findall(
|
||||
# According to examples from [3] it's unclear whether video id
|
||||
# may be optional and what to do when it is
|
||||
r'''(?sx)
|
||||
<video[^>]+
|
||||
data-video-id=["\'](\d+)["\'][^>]*>.*?
|
||||
</video>.*?
|
||||
<script[^>]+
|
||||
src=["\'](?:https?:)?//players\.brightcove\.net/
|
||||
(\d+)/([\da-f-]+)_([^/]+)/index\.min\.js
|
||||
''', webpage):
|
||||
entries.append(
|
||||
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
|
||||
% (account_id, player_id, embed, video_id))
|
||||
|
||||
return entries
|
||||
|
||||
def _real_extract(self, url):
|
||||
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://players.brightcove.net/%s/%s_%s/index.min.js'
|
||||
% (account_id, player_id, embed), video_id)
|
||||
|
||||
policy_key = None
|
||||
|
||||
catalog = self._search_regex(
|
||||
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
|
||||
if catalog:
|
||||
catalog = self._parse_json(
|
||||
js_to_json(catalog), video_id, fatal=False)
|
||||
if catalog:
|
||||
policy_key = catalog.get('policyKey')
|
||||
|
||||
if not policy_key:
|
||||
policy_key = self._search_regex(
|
||||
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
||||
webpage, 'policy key', group='pk')
|
||||
|
||||
req = sanitized_Request(
|
||||
'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s'
|
||||
% (account_id, video_id),
|
||||
headers={'Accept': 'application/json;pk=%s' % policy_key})
|
||||
json_data = self._download_json(req, video_id)
|
||||
|
||||
title = json_data['name']
|
||||
|
||||
formats = []
|
||||
for source in json_data.get('sources', []):
|
||||
source_type = source.get('type')
|
||||
src = source.get('src')
|
||||
if source_type == 'application/x-mpegURL':
|
||||
if not src:
|
||||
continue
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
if m3u8_formats:
|
||||
formats.extend(m3u8_formats)
|
||||
else:
|
||||
streaming_src = source.get('streaming_src')
|
||||
stream_name, app_name = source.get('stream_name'), source.get('app_name')
|
||||
if not src and not streaming_src and (not stream_name or not app_name):
|
||||
continue
|
||||
tbr = float_or_none(source.get('avg_bitrate'), 1000)
|
||||
height = int_or_none(source.get('height'))
|
||||
f = {
|
||||
'tbr': tbr,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': height,
|
||||
'filesize': int_or_none(source.get('size')),
|
||||
'container': source.get('container'),
|
||||
'vcodec': source.get('codec'),
|
||||
'ext': source.get('container').lower(),
|
||||
}
|
||||
|
||||
def build_format_id(kind):
|
||||
format_id = kind
|
||||
if tbr:
|
||||
format_id += '-%dk' % int(tbr)
|
||||
if height:
|
||||
format_id += '-%dp' % height
|
||||
return format_id
|
||||
|
||||
if src or streaming_src:
|
||||
f.update({
|
||||
'url': src or streaming_src,
|
||||
'format_id': build_format_id('http' if src else 'http-streaming'),
|
||||
'preference': 2 if src else 1,
|
||||
})
|
||||
else:
|
||||
f.update({
|
||||
'url': app_name,
|
||||
'play_path': stream_name,
|
||||
'format_id': build_format_id('rtmp'),
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = json_data.get('description')
|
||||
thumbnail = json_data.get('thumbnail')
|
||||
timestamp = parse_iso8601(json_data.get('published_at'))
|
||||
duration = float_or_none(json_data.get('duration'), 1000)
|
||||
tags = json_data.get('tags', [])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'uploader_id': account_id,
|
||||
'formats': formats,
|
||||
'tags': tags,
|
||||
}
|
||||
|
@@ -4,38 +4,53 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_duration
|
||||
|
||||
|
||||
class Canalc2IE(InfoExtractor):
|
||||
IE_NAME = 'canalc2.tv'
|
||||
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?canalc2\.tv/video/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
|
||||
'url': 'http://www.canalc2.tv/video/12163',
|
||||
'md5': '060158428b650f896c542dfbb3d6487f',
|
||||
'info_dict': {
|
||||
'id': '12163',
|
||||
'ext': 'mp4',
|
||||
'title': 'Terrasses du Numérique'
|
||||
'ext': 'flv',
|
||||
'title': 'Terrasses du Numérique',
|
||||
'duration': 122,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Requires rtmpdump
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = re.match(self._VALID_URL, url).group('id')
|
||||
# We need to set the voir field for getting the file name
|
||||
url = 'http://www.canalc2.tv/video.asp?idVideo=%s&voir=oui' % video_id
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
file_name = self._search_regex(
|
||||
r"so\.addVariable\('file','(.*?)'\);",
|
||||
webpage, 'file name')
|
||||
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
|
||||
video_url = self._search_regex(
|
||||
r'jwplayer\((["\'])Player\1\)\.setup\({[^}]*file\s*:\s*(["\'])(?P<file>.+?)\2',
|
||||
webpage, 'video_url', group='file')
|
||||
formats = [{'url': video_url}]
|
||||
if video_url.startswith('rtmp://'):
|
||||
rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+/))(?P<play_path>mp4:.+)$', video_url)
|
||||
formats[0].update({
|
||||
'url': rtmp.group('url'),
|
||||
'ext': 'flv',
|
||||
'app': rtmp.group('app'),
|
||||
'play_path': rtmp.group('play_path'),
|
||||
'page_url': url,
|
||||
})
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'class="evenement8">(.*?)</a>', webpage, 'title')
|
||||
r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.*?)</h3>', webpage, 'title')
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'id=["\']video_duree["\'][^>]*>([^<]+)',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -1,6 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class CBSIE(InfoExtractor):
|
||||
@@ -46,13 +50,19 @@ class CBSIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
request = sanitized_Request(url)
|
||||
# Android UA is served with higher quality (720p) streams (see
|
||||
# https://github.com/rg3/youtube-dl/issues/7490)
|
||||
request.add_header('User-Agent', 'Mozilla/5.0 (Linux; Android 4.4; Nexus 5)')
|
||||
webpage = self._download_webpage(request, display_id)
|
||||
real_id = self._search_regex(
|
||||
[r"video\.settings\.pid\s*=\s*'([^']+)';", r"cbsplayer\.pid\s*=\s*'([^']+)';"],
|
||||
webpage, 'real video ID')
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': 'theplatform:%s' % real_id,
|
||||
'url': smuggle_url(
|
||||
'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true&manifest=m3u' % real_id,
|
||||
{'force_smil_url': True}),
|
||||
'display_id': display_id,
|
||||
}
|
||||
|
@@ -67,9 +67,12 @@ class CBSNewsIE(InfoExtractor):
|
||||
'format_id': format_id,
|
||||
}
|
||||
if uri.startswith('rtmp'):
|
||||
play_path = re.sub(
|
||||
r'{slistFilePath}', '',
|
||||
uri.split('<break>')[-1].split('{break}')[-1])
|
||||
fmt.update({
|
||||
'app': 'ondemand?auth=cbs',
|
||||
'play_path': 'mp4:' + uri.split('<break>')[-1],
|
||||
'play_path': 'mp4:' + play_path,
|
||||
'player_url': 'http://www.cbsnews.com/[[IMPORT]]/vidtech.cbsinteractive.com/player/3_3_0/CBSI_PLAYER_HD.swf',
|
||||
'page_url': 'http://www.cbsnews.com',
|
||||
'ext': 'flv',
|
||||
|
@@ -5,7 +5,6 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
@@ -13,6 +12,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -100,7 +100,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
'requestSource': 'iVysilani',
|
||||
}
|
||||
|
||||
req = compat_urllib_request.Request(
|
||||
req = sanitized_Request(
|
||||
'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
||||
data=compat_urllib_parse.urlencode(data))
|
||||
|
||||
@@ -115,7 +115,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
if playlist_url == 'error_region':
|
||||
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||
|
||||
req = compat_urllib_request.Request(compat_urllib_parse_unquote(playlist_url))
|
||||
req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
|
||||
req.add_header('Referer', url)
|
||||
|
||||
playlist_title = self._og_search_title(webpage)
|
||||
|
57
youtube_dl/extractor/clyp.py
Normal file
57
youtube_dl/extractor/clyp.py
Normal file
@@ -0,0 +1,57 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class ClypIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P<id>[a-z0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://clyp.it/ojz2wfah',
|
||||
'md5': '1d4961036c41247ecfdcc439c0cddcbb',
|
||||
'info_dict': {
|
||||
'id': 'ojz2wfah',
|
||||
'ext': 'mp3',
|
||||
'title': 'Krisson80 - bits wip wip',
|
||||
'description': '#Krisson80BitsWipWip #chiptune\n#wip',
|
||||
'duration': 263.21,
|
||||
'timestamp': 1443515251,
|
||||
'upload_date': '20150929',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://api.clyp.it/%s' % audio_id, audio_id)
|
||||
|
||||
formats = []
|
||||
for secure in ('', 'Secure'):
|
||||
for ext in ('Ogg', 'Mp3'):
|
||||
format_id = '%s%s' % (secure, ext)
|
||||
format_url = metadata.get('%sUrl' % format_id)
|
||||
if format_url:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = metadata['Title']
|
||||
description = metadata.get('Description')
|
||||
duration = float_or_none(metadata.get('Duration'))
|
||||
timestamp = parse_iso8601(metadata.get('DateCreated'))
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
@@ -4,7 +4,7 @@ from .mtv import MTVIE
|
||||
|
||||
class CMTIE(MTVIE):
|
||||
IE_NAME = 'cmt.com'
|
||||
_VALID_URL = r'https?://www\.cmt\.com/videos/.+?/(?P<videoid>[^/]+)\.jhtml'
|
||||
_VALID_URL = r'https?://www\.cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P<videoid>\d+)'
|
||||
_FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -16,4 +16,7 @@ class CMTIE(MTVIE):
|
||||
'title': 'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
|
||||
'description': 'Blame It All On My Roots',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
@@ -3,10 +3,10 @@ from __future__ import unicode_literals
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -52,7 +52,7 @@ class CollegeRamaIE(InfoExtractor):
|
||||
}
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
'http://collegerama.tudelft.nl/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions',
|
||||
json.dumps(player_options_request))
|
||||
request.add_header('Content-Type', 'application/json')
|
||||
|
@@ -10,20 +10,18 @@ import re
|
||||
import socket
|
||||
import sys
|
||||
import time
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from ..compat import (
|
||||
compat_cookiejar,
|
||||
compat_cookies,
|
||||
compat_getpass,
|
||||
compat_HTTPError,
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
compat_etree_fromstring,
|
||||
)
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
@@ -38,6 +36,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
RegexNotFoundError,
|
||||
sanitize_filename,
|
||||
sanitized_Request,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
url_basename,
|
||||
@@ -172,6 +171,7 @@ class InfoExtractor(object):
|
||||
view_count: How many users have watched the video on the platform.
|
||||
like_count: Number of positive ratings of the video
|
||||
dislike_count: Number of negative ratings of the video
|
||||
repost_count: Number of reposts of the video
|
||||
average_rating: Average rating give by users, the scale used depends on the webpage
|
||||
comment_count: Number of comments on the video
|
||||
comments: A list of comments, each with one or more of the following
|
||||
@@ -310,11 +310,11 @@ class InfoExtractor(object):
|
||||
@classmethod
|
||||
def ie_key(cls):
|
||||
"""A string for getting the InfoExtractor with get_info_extractor"""
|
||||
return cls.__name__[:-2]
|
||||
return compat_str(cls.__name__[:-2])
|
||||
|
||||
@property
|
||||
def IE_NAME(self):
|
||||
return type(self).__name__[:-2]
|
||||
return compat_str(type(self).__name__[:-2])
|
||||
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
||||
""" Returns the response handle """
|
||||
@@ -461,7 +461,7 @@ class InfoExtractor(object):
|
||||
return xml_string
|
||||
if transform_source:
|
||||
xml_string = transform_source(xml_string)
|
||||
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
|
||||
return compat_etree_fromstring(xml_string.encode('utf-8'))
|
||||
|
||||
def _download_json(self, url_or_request, video_id,
|
||||
note='Downloading JSON metadata',
|
||||
@@ -645,7 +645,7 @@ class InfoExtractor(object):
|
||||
# Helper functions for extracting OpenGraph info
|
||||
@staticmethod
|
||||
def _og_regexes(prop):
|
||||
content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\'|\s*([^\s"\'=<>`]+?))'
|
||||
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
|
||||
property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)'
|
||||
% {'prop': re.escape(prop)})
|
||||
template = r'<meta[^>]+?%s[^>]+?%s'
|
||||
@@ -841,7 +841,7 @@ class InfoExtractor(object):
|
||||
self._request_webpage(url, video_id, 'Checking %s URL' % item)
|
||||
return True
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
if isinstance(e.cause, compat_urllib_error.URLError):
|
||||
self.to_screen(
|
||||
'%s: %s URL is invalid, skipping' % (video_id, item))
|
||||
return False
|
||||
@@ -891,6 +891,11 @@ class InfoExtractor(object):
|
||||
if not media_nodes:
|
||||
manifest_version = '2.0'
|
||||
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
|
||||
base_url = xpath_text(
|
||||
manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
|
||||
'base URL', default=None)
|
||||
if base_url:
|
||||
base_url = base_url.strip()
|
||||
for i, media_el in enumerate(media_nodes):
|
||||
if manifest_version == '2.0':
|
||||
media_url = media_el.attrib.get('href') or media_el.attrib.get('url')
|
||||
@@ -898,7 +903,7 @@ class InfoExtractor(object):
|
||||
continue
|
||||
manifest_url = (
|
||||
media_url if media_url.startswith('http://') or media_url.startswith('https://')
|
||||
else ('/'.join(manifest_url.split('/')[:-1]) + '/' + media_url))
|
||||
else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
|
||||
# If media_url is itself a f4m manifest do the recursive extraction
|
||||
# since bitrates in parent manifest (this one) and media_url manifest
|
||||
# may differ leading to inability to resolve the format by requested
|
||||
@@ -943,13 +948,15 @@ class InfoExtractor(object):
|
||||
if re.match(r'^https?://', u)
|
||||
else compat_urlparse.urljoin(m3u8_url, u))
|
||||
|
||||
m3u8_doc = self._download_webpage(
|
||||
res = self._download_webpage_handle(
|
||||
m3u8_url, video_id,
|
||||
note=note or 'Downloading m3u8 information',
|
||||
errnote=errnote or 'Failed to download m3u8 information',
|
||||
fatal=fatal)
|
||||
if m3u8_doc is False:
|
||||
return m3u8_doc
|
||||
if res is False:
|
||||
return res
|
||||
m3u8_doc, urlh = res
|
||||
m3u8_url = urlh.geturl()
|
||||
last_info = None
|
||||
last_media = None
|
||||
kv_rex = re.compile(
|
||||
@@ -1278,7 +1285,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _get_cookies(self, url):
|
||||
""" Return a compat_cookies.SimpleCookie with the cookies for the url """
|
||||
req = compat_urllib_request.Request(url)
|
||||
req = sanitized_Request(url)
|
||||
self._downloader.cookiejar.add_cookie_header(req)
|
||||
return compat_cookies.SimpleCookie(req.get_header('Cookie'))
|
||||
|
||||
|
@@ -5,12 +5,12 @@ import re
|
||||
import json
|
||||
import base64
|
||||
import zlib
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from hashlib import sha1
|
||||
from math import pow, sqrt, floor
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
@@ -21,7 +21,9 @@ from ..utils import (
|
||||
bytes_to_intlist,
|
||||
intlist_to_bytes,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
remove_end,
|
||||
sanitized_Request,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
@@ -32,9 +34,29 @@ from ..aes import (
|
||||
|
||||
|
||||
class CrunchyrollBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'crunchyroll'
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
self.report_login()
|
||||
login_url = 'https://www.crunchyroll.com/?a=formhandler'
|
||||
data = urlencode_postdata({
|
||||
'formname': 'RpcApiUser_Login',
|
||||
'name': username,
|
||||
'password': password,
|
||||
})
|
||||
login_request = sanitized_Request(login_url, data)
|
||||
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
self._download_webpage(login_request, None, False, 'Wrong login info')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
|
||||
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
|
||||
else compat_urllib_request.Request(url_or_request))
|
||||
else sanitized_Request(url_or_request))
|
||||
# Accept-Language must be set explicitly to accept any language to avoid issues
|
||||
# similar to https://github.com/rg3/youtube-dl/issues/6797.
|
||||
# Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
|
||||
@@ -46,10 +68,22 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
return super(CrunchyrollBaseIE, self)._download_webpage(
|
||||
request, video_id, note, errnote, fatal, tries, timeout, encoding)
|
||||
|
||||
@staticmethod
|
||||
def _add_skip_wall(url):
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
qs = compat_urlparse.parse_qs(parsed_url.query)
|
||||
# Always force skip_wall to bypass maturity wall, namely 18+ confirmation message:
|
||||
# > This content may be inappropriate for some people.
|
||||
# > Are you sure you want to continue?
|
||||
# since it's not disabled by default in crunchyroll account's settings.
|
||||
# See https://github.com/rg3/youtube-dl/issues/7202.
|
||||
qs['skip_wall'] = ['1']
|
||||
return compat_urlparse.urlunparse(
|
||||
parsed_url._replace(query=compat_urllib_parse.urlencode(qs, True)))
|
||||
|
||||
|
||||
class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||
_NETRC_MACHINE = 'crunchyroll'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||
'info_dict': {
|
||||
@@ -72,7 +106,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
'id': '589804',
|
||||
'ext': 'flv',
|
||||
'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11',
|
||||
'description': 'md5:fe2743efedb49d279552926d0bd0cd9e',
|
||||
'description': 'md5:2fbc01f90b87e8e9137296f37b461c12',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'Danny Choo Network',
|
||||
'upload_date': '20120213',
|
||||
@@ -81,10 +115,13 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
# rtmp
|
||||
'skip_download': True,
|
||||
},
|
||||
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# geo-restricted (US), 18+ maturity wall, non-premium available
|
||||
'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMAT_IDS = {
|
||||
@@ -94,24 +131,6 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
'1080': ('80', '108'),
|
||||
}
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
self.report_login()
|
||||
login_url = 'https://www.crunchyroll.com/?a=formhandler'
|
||||
data = urlencode_postdata({
|
||||
'formname': 'RpcApiUser_Login',
|
||||
'name': username,
|
||||
'password': password,
|
||||
})
|
||||
login_request = compat_urllib_request.Request(login_url, data)
|
||||
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
self._download_webpage(login_request, None, False, 'Wrong login info')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _decrypt_subtitles(self, data, iv, id):
|
||||
data = bytes_to_intlist(base64.b64decode(data.encode('utf-8')))
|
||||
iv = bytes_to_intlist(base64.b64decode(iv.encode('utf-8')))
|
||||
@@ -217,7 +236,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
return output
|
||||
|
||||
def _extract_subtitles(self, subtitle):
|
||||
sub_root = xml.etree.ElementTree.fromstring(subtitle)
|
||||
sub_root = compat_etree_fromstring(subtitle)
|
||||
return [{
|
||||
'ext': 'srt',
|
||||
'data': self._convert_subtitles_to_srt(sub_root),
|
||||
@@ -228,7 +247,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
|
||||
def _get_subtitles(self, video_id, webpage):
|
||||
subtitles = {}
|
||||
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
|
||||
for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage):
|
||||
sub_page = self._download_webpage(
|
||||
'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
|
||||
video_id, note='Downloading subtitles for ' + sub_name)
|
||||
@@ -254,7 +273,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
else:
|
||||
webpage_url = 'http://www.' + mobj.group('url')
|
||||
|
||||
webpage = self._download_webpage(webpage_url, video_id, 'Downloading webpage')
|
||||
webpage = self._download_webpage(self._add_skip_wall(webpage_url), video_id, 'Downloading webpage')
|
||||
note_m = self._html_search_regex(
|
||||
r'<div class="showmedia-trailer-notice">(.+?)</div>',
|
||||
webpage, 'trailer-notice', default='')
|
||||
@@ -270,11 +289,15 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
if 'To view this, please log in to verify you are 18 or older.' in webpage:
|
||||
self.raise_login_required()
|
||||
|
||||
video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL)
|
||||
video_title = self._html_search_regex(
|
||||
r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
|
||||
webpage, 'video_title')
|
||||
video_title = re.sub(r' {2,}', ' ', video_title)
|
||||
video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='')
|
||||
if not video_description:
|
||||
video_description = None
|
||||
video_description = self._html_search_regex(
|
||||
r'<script[^>]*>\s*.+?\[media_id=%s\].+?"description"\s*:\s*"([^"]+)' % video_id,
|
||||
webpage, 'description', default=None)
|
||||
if video_description:
|
||||
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
|
||||
video_upload_date = self._html_search_regex(
|
||||
[r'<div>Availability for free users:(.+?)</div>', r'<div>[^<>]+<span>\s*(.+?\d{4})\s*</span></div>'],
|
||||
webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
|
||||
@@ -285,7 +308,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'video_uploader', fatal=False)
|
||||
|
||||
playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
|
||||
playerdata_req = compat_urllib_request.Request(playerdata_url)
|
||||
playerdata_req = sanitized_Request(playerdata_url)
|
||||
playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url})
|
||||
playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info')
|
||||
@@ -297,7 +320,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
|
||||
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
||||
video_format = fmt + 'p'
|
||||
streamdata_req = compat_urllib_request.Request(
|
||||
streamdata_req = sanitized_Request(
|
||||
'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'
|
||||
% (stream_id, stream_format, stream_quality),
|
||||
compat_urllib_parse.urlencode({'current_page': url}).encode('utf-8'))
|
||||
@@ -352,7 +375,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
|
||||
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||
IE_NAME = "crunchyroll:playlist"
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?$'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?(?:\?|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
|
||||
@@ -361,12 +384,25 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||
'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi'
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
# geo-restricted (US), 18+ maturity wall, non-premium available
|
||||
'url': 'http://www.crunchyroll.com/cosplay-complex-ova',
|
||||
'info_dict': {
|
||||
'id': 'cosplay-complex-ova',
|
||||
'title': 'Cosplay Complex OVA'
|
||||
},
|
||||
'playlist_count': 3,
|
||||
'skip': 'Georestricted',
|
||||
}, {
|
||||
# geo-restricted (US), 18+ maturity wall, non-premium will be available since 2015.11.14
|
||||
'url': 'http://www.crunchyroll.com/ladies-versus-butlers?skip_wall=1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, show_id)
|
||||
webpage = self._download_webpage(self._add_skip_wall(url), show_id)
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>',
|
||||
webpage, 'title')
|
||||
|
@@ -7,15 +7,13 @@ import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
)
|
||||
@@ -25,7 +23,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
@staticmethod
|
||||
def _build_request(url):
|
||||
"""Build a request with the family filter disabled"""
|
||||
request = compat_urllib_request.Request(url)
|
||||
request = sanitized_Request(url)
|
||||
request.add_header('Cookie', 'family_filter=off; ff=off')
|
||||
return request
|
||||
|
||||
@@ -96,6 +94,11 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'uploader': 'HotWaves1012',
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
# geo-restricted, player v5
|
||||
{
|
||||
'url': 'http://www.dailymotion.com/video/xhza0o',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
@@ -124,6 +127,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
if player_v5:
|
||||
player = self._parse_json(player_v5, video_id)
|
||||
metadata = player['metadata']
|
||||
|
||||
self._check_error(metadata)
|
||||
|
||||
formats = []
|
||||
for quality, media_list in metadata['qualities'].items():
|
||||
for media in media_list:
|
||||
@@ -133,9 +139,17 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
type_ = media.get('type')
|
||||
if type_ == 'application/vnd.lumberjack.manifest':
|
||||
continue
|
||||
if type_ == 'application/x-mpegURL' or determine_ext(media_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', m3u8_id='hls'))
|
||||
ext = determine_ext(media_url)
|
||||
if type_ == 'application/x-mpegURL' or ext == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
if m3u8_formats:
|
||||
formats.extend(m3u8_formats)
|
||||
elif type_ == 'application/f4m' or ext == 'f4m':
|
||||
f4m_formats = self._extract_f4m_formats(
|
||||
media_url, video_id, preference=-1, f4m_id='hds', fatal=False)
|
||||
if f4m_formats:
|
||||
formats.extend(f4m_formats)
|
||||
else:
|
||||
f = {
|
||||
'url': media_url,
|
||||
@@ -201,9 +215,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'video info', flags=re.MULTILINE),
|
||||
video_id)
|
||||
|
||||
if info.get('error') is not None:
|
||||
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
|
||||
raise ExtractorError(msg, expected=True)
|
||||
self._check_error(info)
|
||||
|
||||
formats = []
|
||||
for (key, format_id) in self._FORMATS:
|
||||
@@ -246,6 +258,11 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'duration': info['duration']
|
||||
}
|
||||
|
||||
def _check_error(self, info):
|
||||
if info.get('error') is not None:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, info['error']['title']), expected=True)
|
||||
|
||||
def _get_subtitles(self, video_id, webpage):
|
||||
try:
|
||||
sub_list = self._download_webpage(
|
||||
|
@@ -2,13 +2,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -36,7 +34,7 @@ class DCNIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
|
||||
headers={'Origin': 'http://www.dcndigital.ae'})
|
||||
|
||||
|
88
youtube_dl/extractor/democracynow.py
Normal file
88
youtube_dl/extractor/democracynow.py
Normal file
@@ -0,0 +1,88 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import os.path
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
url_basename,
|
||||
remove_start,
|
||||
)
|
||||
|
||||
|
||||
class DemocracynowIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?democracynow.org/(?P<id>[^\?]*)'
|
||||
IE_NAME = 'democracynow'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.democracynow.org/shows/2015/7/3',
|
||||
'md5': 'fbb8fe3d7a56a5e12431ce2f9b2fab0d',
|
||||
'info_dict': {
|
||||
'id': '2015-0703-001',
|
||||
'ext': 'mp4',
|
||||
'title': 'July 03, 2015 - Democracy Now!',
|
||||
'description': 'A daily independent global news hour with Amy Goodman & Juan González "What to the Slave is 4th of July?": James Earl Jones Reads Frederick Douglass\u2019 Historic Speech : "This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag : "We Shall Overcome": Remembering Folk Icon, Activist Pete Seeger in His Own Words & Songs',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree',
|
||||
'md5': 'fbb8fe3d7a56a5e12431ce2f9b2fab0d',
|
||||
'info_dict': {
|
||||
'id': '2015-0703-001',
|
||||
'ext': 'mp4',
|
||||
'title': '"This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag',
|
||||
'description': 'md5:4d2bc4f0d29f5553c2210a4bc7761a21',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
json_data = self._parse_json(self._search_regex(
|
||||
r'<script[^>]+type="text/json"[^>]*>\s*({[^>]+})', webpage, 'json'),
|
||||
display_id)
|
||||
video_id = None
|
||||
formats = []
|
||||
|
||||
default_lang = 'en'
|
||||
|
||||
subtitles = {}
|
||||
|
||||
def add_subtitle_item(lang, info_dict):
|
||||
if lang not in subtitles:
|
||||
subtitles[lang] = []
|
||||
subtitles[lang].append(info_dict)
|
||||
|
||||
# chapter_file are not subtitles
|
||||
if 'caption_file' in json_data:
|
||||
add_subtitle_item(default_lang, {
|
||||
'url': compat_urlparse.urljoin(url, json_data['caption_file']),
|
||||
})
|
||||
|
||||
for subtitle_item in json_data.get('captions', []):
|
||||
lang = subtitle_item.get('language', '').lower() or default_lang
|
||||
add_subtitle_item(lang, {
|
||||
'url': compat_urlparse.urljoin(url, subtitle_item['url']),
|
||||
})
|
||||
|
||||
for key in ('file', 'audio', 'video'):
|
||||
media_url = json_data.get(key, '')
|
||||
if not media_url:
|
||||
continue
|
||||
media_url = re.sub(r'\?.*', '', compat_urlparse.urljoin(url, media_url))
|
||||
video_id = video_id or remove_start(os.path.splitext(url_basename(media_url))[0], 'dn')
|
||||
formats.append({
|
||||
'url': media_url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id or display_id,
|
||||
'title': json_data['title'],
|
||||
'description': description,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
51
youtube_dl/extractor/dplay.py
Normal file
51
youtube_dl/extractor/dplay.py
Normal file
@@ -0,0 +1,51 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class DPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.dplay\.se/[^/]+/(?P<id>[^/?#]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
|
||||
'info_dict': {
|
||||
'id': '3172',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'season-1-svensken-lar-sig-njuta-av-livet',
|
||||
'title': 'Svensken lär sig njuta av livet',
|
||||
'duration': 2650,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
r'data-video-id="(\d+)"', webpage, 'video id')
|
||||
|
||||
info = self._download_json(
|
||||
'http://www.dplay.se/api/v2/ajax/videos?video_id=' + video_id,
|
||||
video_id)['data'][0]
|
||||
|
||||
self._set_cookie(
|
||||
'secure.dplay.se', 'dsc-geo',
|
||||
'{"countryCode":"NL","expiry":%d}' % ((time.time() + 20 * 60) * 1000))
|
||||
# TODO: consider adding support for 'stream_type=hds', it seems to
|
||||
# require setting some cookies
|
||||
manifest_url = self._download_json(
|
||||
'https://secure.dplay.se/secure/api/v2/user/authorization/stream/%s?stream_type=hls' % video_id,
|
||||
video_id, 'Getting manifest url for hls stream')['hls']
|
||||
formats = self._extract_m3u8_formats(
|
||||
manifest_url, video_id, ext='mp4', entry_protocol='m3u8_native')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': info['title'],
|
||||
'formats': formats,
|
||||
'duration': int_or_none(info.get('video_metadata_length'), scale=1000),
|
||||
}
|
@@ -7,7 +7,6 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -16,6 +15,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@ class DramaFeverBaseIE(InfoExtractor):
|
||||
'password': password,
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
response = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
|
@@ -2,14 +2,17 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import qualities
|
||||
from ..utils import (
|
||||
qualities,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class DumpertIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
|
||||
_VALID_URL = r'(?P<protocol>https?)://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
|
||||
'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
|
||||
@@ -26,10 +29,12 @@ class DumpertIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
protocol = mobj.group('protocol')
|
||||
|
||||
url = 'https://www.dumpert.nl/mediabase/' + video_id
|
||||
req = compat_urllib_request.Request(url)
|
||||
url = '%s://www.dumpert.nl/mediabase/%s' % (protocol, video_id)
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('Cookie', 'nsfw=1; cpc=10')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
|
@@ -87,7 +87,7 @@ class EaglePlatformIE(InfoExtractor):
|
||||
m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id,
|
||||
'mp4', entry_protocol='m3u8_native')
|
||||
'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
|
||||
mp4_url = self._get_video_url(
|
||||
# Secure mp4 URL is constructed according to Player.prototype.mp4 from
|
||||
|
@@ -1,39 +1,92 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveIE
|
||||
from ..utils import ExtractorError
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class EitbIE(InfoExtractor):
|
||||
IE_NAME = 'eitb.tv'
|
||||
_VALID_URL = r'https?://www\.eitb\.tv/(eu/bideoa|es/video)/[^/]+/(?P<playlist_id>\d+)/(?P<chapter_id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?eitb\.tv/(?:eu/bideoa|es/video)/[^/]+/\d+/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'add_ie': ['Brightcove'],
|
||||
'url': 'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/',
|
||||
'url': 'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/4104995148001/4090227752001/lasa-y-zabala-30-anos/',
|
||||
'md5': 'edf4436247185adee3ea18ce64c47998',
|
||||
'info_dict': {
|
||||
'id': '2743577154001',
|
||||
'id': '4090227752001',
|
||||
'ext': 'mp4',
|
||||
'title': '60 minutos (Lasa y Zabala, 30 años)',
|
||||
# All videos from eitb has this description in the brightcove info
|
||||
'description': '.',
|
||||
'uploader': 'Euskal Telebista',
|
||||
'description': 'Programa de reportajes de actualidad.',
|
||||
'duration': 3996.76,
|
||||
'timestamp': 1381789200,
|
||||
'upload_date': '20131014',
|
||||
'tags': list,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
chapter_id = mobj.group('chapter_id')
|
||||
webpage = self._download_webpage(url, chapter_id)
|
||||
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
||||
if bc_url is None:
|
||||
raise ExtractorError('Could not extract the Brightcove url')
|
||||
# The BrightcoveExperience object doesn't contain the video id, we set
|
||||
# it manually
|
||||
bc_url += '&%40videoPlayer={0}'.format(chapter_id)
|
||||
return self.url_result(bc_url, BrightcoveIE.ie_key())
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'http://mam.eitb.eus/mam/REST/ServiceMultiweb/Video/MULTIWEBTV/%s/' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
media = video['web_media'][0]
|
||||
|
||||
formats = []
|
||||
for rendition in media['RENDITIONS']:
|
||||
video_url = rendition.get('PMD_URL')
|
||||
if not video_url:
|
||||
continue
|
||||
tbr = float_or_none(rendition.get('ENCODING_RATE'), 1000)
|
||||
format_id = 'http'
|
||||
if tbr:
|
||||
format_id += '-%d' % int(tbr)
|
||||
formats.append({
|
||||
'url': rendition['PMD_URL'],
|
||||
'format_id': format_id,
|
||||
'width': int_or_none(rendition.get('FRAME_WIDTH')),
|
||||
'height': int_or_none(rendition.get('FRAME_HEIGHT')),
|
||||
'tbr': tbr,
|
||||
})
|
||||
|
||||
hls_url = media.get('HLS_SURL')
|
||||
if hls_url:
|
||||
request = sanitized_Request(
|
||||
'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/',
|
||||
headers={'Referer': url})
|
||||
token_data = self._download_json(
|
||||
request, video_id, 'Downloading auth token', fatal=False)
|
||||
if token_data:
|
||||
token = token_data.get('token')
|
||||
if token:
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
'%s?hdnts=%s' % (hls_url, token), video_id, m3u8_id='hls', fatal=False)
|
||||
if m3u8_formats:
|
||||
formats.extend(m3u8_formats)
|
||||
|
||||
hds_url = media.get('HDS_SURL')
|
||||
if hds_url:
|
||||
f4m_formats = self._extract_f4m_formats(
|
||||
'%s?hdcore=3.7.0' % hds_url.replace('euskalsvod', 'euskalvod'),
|
||||
video_id, f4m_id='hds', fatal=False)
|
||||
if f4m_formats:
|
||||
formats.extend(f4m_formats)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': media.get('NAME_ES') or media.get('name') or media['NAME_EU'],
|
||||
'description': media.get('SHORT_DESC_ES') or video.get('desc_group') or media.get('SHORT_DESC_EU'),
|
||||
'thumbnail': media.get('STILL_URL') or media.get('THUMBNAIL_URL'),
|
||||
'duration': float_or_none(media.get('LENGTH'), 1000),
|
||||
'timestamp': parse_iso8601(media.get('BROADCST_DATE'), ' '),
|
||||
'tags': media.get('TAGS'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -3,13 +3,12 @@ from __future__ import unicode_literals
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -75,7 +74,7 @@ class EscapistIE(InfoExtractor):
|
||||
video_id = ims_video['videoID']
|
||||
key = ims_video['hash']
|
||||
|
||||
config_req = compat_urllib_request.Request(
|
||||
config_req = sanitized_Request(
|
||||
'http://www.escapistmagazine.com/videos/'
|
||||
'vidconfig.php?videoID=%s&hash=%s' % (video_id, key))
|
||||
config_req.add_header('Referer', url)
|
||||
|
@@ -3,11 +3,9 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -42,7 +40,7 @@ class EveryonesMixtapeIE(InfoExtractor):
|
||||
playlist_id = mobj.group('id')
|
||||
|
||||
pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id
|
||||
pllist_req = compat_urllib_request.Request(pllist_url)
|
||||
pllist_req = sanitized_Request(pllist_url)
|
||||
pllist_req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
|
||||
playlist_list = self._download_json(
|
||||
@@ -55,7 +53,7 @@ class EveryonesMixtapeIE(InfoExtractor):
|
||||
raise ExtractorError('Playlist id not found')
|
||||
|
||||
pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no
|
||||
pl_req = compat_urllib_request.Request(pl_url)
|
||||
pl_req = sanitized_Request(pl_url)
|
||||
pl_req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
playlist = self._download_json(
|
||||
pl_req, playlist_id, note='Downloading playlist info')
|
||||
|
@@ -3,23 +3,20 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
qualities,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class ExtremeTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<id>[0-9]+))(?:[/?&]|$)'
|
||||
_VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P<id>[^/#?&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
||||
'md5': '344d0c6d50e2f16b06e49ca011d8ac69',
|
||||
'info_dict': {
|
||||
'id': '652431',
|
||||
'id': 'music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
||||
'ext': 'mp4',
|
||||
'title': 'Music Video 14 british euro brit european cumshots swallow',
|
||||
'uploader': 'unknown',
|
||||
@@ -29,14 +26,18 @@ class ExtremeTubeIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.extremetube.com/gay/video/abcde-1234',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.extremetube.com/video/latina-slut-fucked-by-fat-black-dick',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.extremetube.com/video/652431',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
url = 'http://www.' + mobj.group('url')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
@@ -49,20 +50,36 @@ class ExtremeTubeIE(InfoExtractor):
|
||||
r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
flash_vars = compat_parse_qs(self._search_regex(
|
||||
r'<param[^>]+?name="flashvars"[^>]+?value="([^"]+)"', webpage, 'flash vars'))
|
||||
flash_vars = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flash vars'),
|
||||
video_id)
|
||||
|
||||
formats = []
|
||||
quality = qualities(['180p', '240p', '360p', '480p', '720p', '1080p'])
|
||||
for k, vals in flash_vars.items():
|
||||
m = re.match(r'quality_(?P<quality>[0-9]+p)$', k)
|
||||
if m is not None:
|
||||
formats.append({
|
||||
'format_id': m.group('quality'),
|
||||
'quality': quality(m.group('quality')),
|
||||
'url': vals[0],
|
||||
for quality_key, video_url in flash_vars.items():
|
||||
height = int_or_none(self._search_regex(
|
||||
r'quality_(\d+)[pP]$', quality_key, 'height', default=None))
|
||||
if not height:
|
||||
continue
|
||||
f = {
|
||||
'url': video_url,
|
||||
}
|
||||
mobj = re.search(
|
||||
r'/(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
|
||||
if mobj:
|
||||
height = int(mobj.group('height'))
|
||||
bitrate = int(mobj.group('bitrate'))
|
||||
f.update({
|
||||
'format_id': '%dp-%dk' % (height, bitrate),
|
||||
'height': height,
|
||||
'tbr': bitrate,
|
||||
})
|
||||
|
||||
else:
|
||||
f.update({
|
||||
'format_id': '%dp' % height,
|
||||
'height': height,
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@@ -10,12 +10,11 @@ from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
limit_length,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
get_element_by_id,
|
||||
clean_html,
|
||||
@@ -74,7 +73,7 @@ class FacebookIE(InfoExtractor):
|
||||
if useremail is None:
|
||||
return
|
||||
|
||||
login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
|
||||
login_page_req = sanitized_Request(self._LOGIN_URL)
|
||||
login_page_req.add_header('Cookie', 'locale=en_US')
|
||||
login_page = self._download_webpage(login_page_req, None,
|
||||
note='Downloading login page',
|
||||
@@ -95,7 +94,7 @@ class FacebookIE(InfoExtractor):
|
||||
'timezone': '-60',
|
||||
'trynum': '1',
|
||||
}
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form))
|
||||
request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
try:
|
||||
login_results = self._download_webpage(request, None,
|
||||
@@ -110,7 +109,7 @@ class FacebookIE(InfoExtractor):
|
||||
r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h'),
|
||||
'name_action_selected': 'dont_save',
|
||||
}
|
||||
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
|
||||
check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
|
||||
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
check_response = self._download_webpage(check_req, None,
|
||||
note='Confirming login')
|
||||
@@ -142,16 +141,20 @@ class FacebookIE(InfoExtractor):
|
||||
data = dict(json.loads(m.group(1)))
|
||||
params_raw = compat_urllib_parse_unquote(data['params'])
|
||||
params = json.loads(params_raw)
|
||||
video_data = params['video_data'][0]
|
||||
|
||||
formats = []
|
||||
for quality in ['sd', 'hd']:
|
||||
src = video_data.get('%s_src' % quality)
|
||||
if src is not None:
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
'url': src,
|
||||
})
|
||||
for format_id, f in params['video_data'].items():
|
||||
if not f or not isinstance(f, list):
|
||||
continue
|
||||
for quality in ('sd', 'hd'):
|
||||
for src_type in ('src', 'src_no_ratelimit'):
|
||||
src = f[0].get('%s_%s' % (quality, src_type))
|
||||
if src:
|
||||
formats.append({
|
||||
'format_id': '%s_%s_%s' % (format_id, quality, src_type),
|
||||
'url': src,
|
||||
'preference': -10 if format_id == 'progressive' else 0,
|
||||
})
|
||||
if not formats:
|
||||
raise ExtractorError('Cannot find video formats')
|
||||
|
||||
@@ -171,7 +174,5 @@ class FacebookIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
'duration': int_or_none(video_data.get('video_duration')),
|
||||
'thumbnail': video_data.get('thumbnail_src'),
|
||||
'uploader': uploader,
|
||||
}
|
||||
|
@@ -12,6 +12,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
encode_dict,
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -57,7 +58,7 @@ class FC2IE(InfoExtractor):
|
||||
}
|
||||
|
||||
login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8')
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
|
||||
|
||||
login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
|
||||
@@ -66,7 +67,7 @@ class FC2IE(InfoExtractor):
|
||||
return False
|
||||
|
||||
# this is also needed
|
||||
login_redir = compat_urllib_request.Request('http://id.fc2.com/?mode=redirect&login=done')
|
||||
login_redir = sanitized_Request('http://id.fc2.com/?mode=redirect&login=done')
|
||||
self._download_webpage(
|
||||
login_redir, None, note='Login redirect', errnote='Login redirect failed')
|
||||
|
||||
|
@@ -3,10 +3,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ class FlickrIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
video_uploader_id = mobj.group('uploader_id')
|
||||
webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
|
||||
req = compat_urllib_request.Request(webpage_url)
|
||||
req = sanitized_Request(webpage_url)
|
||||
req.add_header(
|
||||
'User-Agent',
|
||||
# it needs a more recent version
|
||||
|
@@ -3,12 +3,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
@@ -93,7 +91,7 @@ class FourTubeIE(InfoExtractor):
|
||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||
b'Origin': b'http://www.4tube.com',
|
||||
}
|
||||
token_req = compat_urllib_request.Request(token_url, b'{}', headers)
|
||||
token_req = sanitized_Request(token_url, b'{}', headers)
|
||||
tokens = self._download_json(token_req, video_id)
|
||||
formats = [{
|
||||
'url': tokens[format]['token'],
|
||||
|
@@ -83,6 +83,14 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
if subtitle:
|
||||
title += ' - %s' % subtitle
|
||||
|
||||
subtitles = {}
|
||||
subtitles_list = [{
|
||||
'url': subformat['url'],
|
||||
'ext': subformat.get('format'),
|
||||
} for subformat in info.get('subtitles', []) if subformat.get('url')]
|
||||
if subtitles_list:
|
||||
subtitles['fr'] = subtitles_list
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
@@ -91,20 +99,27 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
|
||||
'timestamp': int_or_none(info['diffusion']['timestamp']),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class PluzzIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = 'pluzz.francetv.fr'
|
||||
_VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
|
||||
_VALID_URL = r'https?://(?:m\.)?pluzz\.francetv\.fr/videos/(?P<id>.+?)\.html'
|
||||
|
||||
# Can't use tests, videos expire in 7 days
|
||||
|
||||
def _real_extract(self, url):
|
||||
title = re.match(self._VALID_URL, url).group(1)
|
||||
webpage = self._download_webpage(url, title)
|
||||
video_id = self._search_regex(
|
||||
r'data-diffusion="(\d+)"', webpage, 'ID')
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._html_search_meta(
|
||||
'id_video', webpage, 'video id', default=None)
|
||||
if not video_id:
|
||||
video_id = self._search_regex(
|
||||
r'data-diffusion=["\'](\d+)', webpage, 'video id')
|
||||
|
||||
return self._extract_video(video_id, 'Pluzz')
|
||||
|
||||
|
||||
@@ -120,6 +135,9 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
'title': 'Soir 3',
|
||||
'upload_date': '20130826',
|
||||
'timestamp': 1377548400,
|
||||
'subtitles': {
|
||||
'fr': 'mincount:2',
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
|
||||
|
@@ -45,11 +45,20 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
|
||||
links.sort(key=lambda link: 1 if link[1] == 'mp4' else 0)
|
||||
|
||||
bitrates = self._html_search_regex(r'<source src="[^"]+/v,((?:\d+,)+)\.mp4\.csmil', webpage, 'video bitrates')
|
||||
bitrates = [int(b) for b in bitrates.rstrip(',').split(',')]
|
||||
bitrates.sort()
|
||||
m3u8_url = self._search_regex(
|
||||
r'<source[^>]+src=(["\'])(?P<url>.+?/master\.m3u8)\1',
|
||||
webpage, 'm3u8 url', default=None, group='url')
|
||||
|
||||
formats = []
|
||||
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
if m3u8_formats:
|
||||
formats.extend(m3u8_formats)
|
||||
|
||||
bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)[,/]', m3u8_url)]
|
||||
bitrates.sort()
|
||||
|
||||
for bitrate in bitrates:
|
||||
for link in links:
|
||||
formats.append({
|
||||
|
@@ -3,13 +3,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
remove_end,
|
||||
HEADRequest,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -125,7 +123,7 @@ class GDCVaultIE(InfoExtractor):
|
||||
'password': password,
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(login_url, compat_urllib_parse.urlencode(login_form))
|
||||
request = sanitized_Request(login_url, compat_urllib_parse.urlencode(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
self._download_webpage(request, display_id, 'Logging in')
|
||||
start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')
|
||||
|
@@ -9,8 +9,8 @@ import sys
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
compat_xml_parse_error,
|
||||
)
|
||||
@@ -21,7 +21,7 @@ from ..utils import (
|
||||
HEADRequest,
|
||||
is_html,
|
||||
orderedSet,
|
||||
parse_xml,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
@@ -30,7 +30,10 @@ from ..utils import (
|
||||
url_basename,
|
||||
xpath_text,
|
||||
)
|
||||
from .brightcove import BrightcoveIE
|
||||
from .brightcove import (
|
||||
BrightcoveLegacyIE,
|
||||
BrightcoveNewIE,
|
||||
)
|
||||
from .nbc import NBCSportsVPlayerIE
|
||||
from .ooyala import OoyalaIE
|
||||
from .rutv import RUTVIE
|
||||
@@ -141,6 +144,7 @@ class GenericIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Automatics, robotics and biocybernetics',
|
||||
'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
|
||||
'upload_date': '20130627',
|
||||
'formats': 'mincount:16',
|
||||
'subtitles': 'mincount:1',
|
||||
},
|
||||
@@ -274,7 +278,7 @@ class GenericIE(InfoExtractor):
|
||||
# it also tests brightcove videos that need to set the 'Referer' in the
|
||||
# http requests
|
||||
{
|
||||
'add_ie': ['Brightcove'],
|
||||
'add_ie': ['BrightcoveLegacy'],
|
||||
'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
|
||||
'info_dict': {
|
||||
'id': '2765128793001',
|
||||
@@ -298,7 +302,7 @@ class GenericIE(InfoExtractor):
|
||||
'uploader': 'thestar.com',
|
||||
'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
|
||||
},
|
||||
'add_ie': ['Brightcove'],
|
||||
'add_ie': ['BrightcoveLegacy'],
|
||||
},
|
||||
{
|
||||
'url': 'http://www.championat.com/video/football/v/87/87499.html',
|
||||
@@ -313,7 +317,7 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
# https://github.com/rg3/youtube-dl/issues/3541
|
||||
'add_ie': ['Brightcove'],
|
||||
'add_ie': ['BrightcoveLegacy'],
|
||||
'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
|
||||
'info_dict': {
|
||||
'id': '3866516442001',
|
||||
@@ -819,6 +823,19 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
|
||||
},
|
||||
},
|
||||
# Kaltura embed protected with referrer
|
||||
{
|
||||
'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
|
||||
'info_dict': {
|
||||
'id': '1_g4fbemnq',
|
||||
'ext': 'mp4',
|
||||
'title': 'Violetta - Achter De Schermen - Ruggero',
|
||||
'description': 'Achter de schermen met Ruggero',
|
||||
'timestamp': 1435133761,
|
||||
'upload_date': '20150624',
|
||||
'uploader_id': 'echojecka',
|
||||
},
|
||||
},
|
||||
# Eagle.Platform embed (generic URL)
|
||||
{
|
||||
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
||||
@@ -1030,6 +1047,31 @@ class GenericIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'cinemasnob',
|
||||
},
|
||||
},
|
||||
# BrightcoveInPageEmbed embed
|
||||
{
|
||||
'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
|
||||
'info_dict': {
|
||||
'id': '4238694884001',
|
||||
'ext': 'flv',
|
||||
'title': 'Tabletop: Dread, Last Thoughts',
|
||||
'description': 'Tabletop: Dread, Last Thoughts',
|
||||
'duration': 51690,
|
||||
},
|
||||
},
|
||||
# JWPlayer with M3U8
|
||||
{
|
||||
'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
|
||||
'info_dict': {
|
||||
'id': 'playlist',
|
||||
'ext': 'mp4',
|
||||
'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
|
||||
'uploader': 'ren.tv',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@@ -1173,7 +1215,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
full_response = None
|
||||
if head_response is False:
|
||||
request = compat_urllib_request.Request(url)
|
||||
request = sanitized_Request(url)
|
||||
request.add_header('Accept-Encoding', '*')
|
||||
full_response = self._request_webpage(request, video_id)
|
||||
head_response = full_response
|
||||
@@ -1202,7 +1244,7 @@ class GenericIE(InfoExtractor):
|
||||
'%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
|
||||
|
||||
if not full_response:
|
||||
request = compat_urllib_request.Request(url)
|
||||
request = sanitized_Request(url)
|
||||
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
|
||||
# making it impossible to download only chunk of the file (yet we need only 512kB to
|
||||
# test whether it's HTML or not). According to youtube-dl default Accept-Encoding
|
||||
@@ -1237,7 +1279,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Is it an RSS feed, a SMIL file or a XSPF playlist?
|
||||
try:
|
||||
doc = parse_xml(webpage)
|
||||
doc = compat_etree_fromstring(webpage.encode('utf-8'))
|
||||
if doc.tag == 'rss':
|
||||
return self._extract_rss(url, video_id, doc)
|
||||
elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
|
||||
@@ -1289,14 +1331,14 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_result(
|
||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||
|
||||
# Look for BrightCove:
|
||||
bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
|
||||
# Look for Brightcove Legacy Studio embeds
|
||||
bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
|
||||
if bc_urls:
|
||||
self.to_screen('Brightcove video detected.')
|
||||
entries = [{
|
||||
'_type': 'url',
|
||||
'url': smuggle_url(bc_url, {'Referer': url}),
|
||||
'ie_key': 'Brightcove'
|
||||
'ie_key': 'BrightcoveLegacy'
|
||||
} for bc_url in bc_urls]
|
||||
|
||||
return {
|
||||
@@ -1306,6 +1348,11 @@ class GenericIE(InfoExtractor):
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
# Look for Brightcove New Studio embeds
|
||||
bc_urls = BrightcoveNewIE._extract_urls(webpage)
|
||||
if bc_urls:
|
||||
return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
|
||||
|
||||
# Look for embedded rtl.nl player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
|
||||
@@ -1671,10 +1718,12 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'), 'Zapiks')
|
||||
|
||||
# Look for Kaltura embeds
|
||||
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage) or
|
||||
re.search(r'(?s)(["\'])(?:https?:)?//cdnapisec\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?\1.*?entry_id\s*:\s*(["\'])(?P<id>[^\2]+?)\2', webpage))
|
||||
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
|
||||
re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
|
||||
if mobj is not None:
|
||||
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
|
||||
return self.url_result(smuggle_url(
|
||||
'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
|
||||
{'source_url': url}), 'Kaltura')
|
||||
|
||||
# Look for Eagle.Platform embeds
|
||||
mobj = re.search(
|
||||
@@ -1719,7 +1768,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for UDN embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
|
||||
r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(
|
||||
compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
|
||||
@@ -1839,6 +1888,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
entries = []
|
||||
for video_url in found:
|
||||
video_url = video_url.replace('\\/', '/')
|
||||
video_url = compat_urlparse.urljoin(url, video_url)
|
||||
video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
|
||||
|
||||
@@ -1850,25 +1900,24 @@ class GenericIE(InfoExtractor):
|
||||
# here's a fun little line of code for you:
|
||||
video_id = os.path.splitext(video_id)[0]
|
||||
|
||||
entry_info_dict = {
|
||||
'id': video_id,
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'smil':
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'formats': self._extract_smil_formats(video_url, video_id),
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'age_limit': age_limit,
|
||||
})
|
||||
entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
|
||||
elif ext == 'xspf':
|
||||
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
|
||||
elif ext == 'm3u8':
|
||||
entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
|
||||
else:
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'age_limit': age_limit,
|
||||
})
|
||||
entry_info_dict['url'] = video_url
|
||||
|
||||
entries.append(entry_info_dict)
|
||||
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
|
@@ -14,79 +14,58 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
|
||||
class GloboIE(InfoExtractor):
|
||||
_VALID_URL = 'https?://.+?\.globo\.com/(?P<id>.+)'
|
||||
_VALID_URL = '(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
|
||||
|
||||
_API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist'
|
||||
_SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=17.0.0.132&resource_id=%s'
|
||||
|
||||
_VIDEOID_REGEXES = [
|
||||
r'\bdata-video-id="(\d+)"',
|
||||
r'\bdata-player-videosids="(\d+)"',
|
||||
r'<div[^>]+\bid="(\d+)"',
|
||||
]
|
||||
|
||||
_RESIGN_EXPIRATION = 86400
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://globotv.globo.com/sportv/futebol-nacional/v/os-gols-de-atletico-mg-3-x-2-santos-pela-24a-rodada-do-brasileirao/3654973/',
|
||||
'md5': '03ebf41cb7ade43581608b7d9b71fab0',
|
||||
'info_dict': {
|
||||
'id': '3654973',
|
||||
'ext': 'mp4',
|
||||
'title': 'Os gols de Atlético-MG 3 x 2 Santos pela 24ª rodada do Brasileirão',
|
||||
'duration': 251.585,
|
||||
'uploader': 'SporTV',
|
||||
'uploader_id': 698,
|
||||
'like_count': int,
|
||||
}
|
||||
_TESTS = [{
|
||||
'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/',
|
||||
'md5': 'b3ccc801f75cd04a914d51dadb83a78d',
|
||||
'info_dict': {
|
||||
'id': '3607726',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa',
|
||||
'duration': 103.204,
|
||||
'uploader': 'Globo.com',
|
||||
'uploader_id': '265',
|
||||
},
|
||||
{
|
||||
'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/',
|
||||
'md5': 'b3ccc801f75cd04a914d51dadb83a78d',
|
||||
'info_dict': {
|
||||
'id': '3607726',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa',
|
||||
'duration': 103.204,
|
||||
'uploader': 'Globo.com',
|
||||
'uploader_id': 265,
|
||||
'like_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://globoplay.globo.com/v/4581987/',
|
||||
'md5': 'f36a1ecd6a50da1577eee6dd17f67eff',
|
||||
'info_dict': {
|
||||
'id': '4581987',
|
||||
'ext': 'mp4',
|
||||
'title': 'Acidentes de trânsito estão entre as maiores causas de queda de energia em SP',
|
||||
'duration': 137.973,
|
||||
'uploader': 'Rede Globo',
|
||||
'uploader_id': '196',
|
||||
},
|
||||
{
|
||||
'url': 'http://g1.globo.com/jornal-nacional/noticia/2014/09/novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes.html',
|
||||
'md5': '307fdeae4390ccfe6ba1aa198cf6e72b',
|
||||
'info_dict': {
|
||||
'id': '3652183',
|
||||
'ext': 'mp4',
|
||||
'title': 'Receita Federal explica como vai fiscalizar bagagens de quem retorna ao Brasil de avião',
|
||||
'duration': 110.711,
|
||||
'uploader': 'Rede Globo',
|
||||
'uploader_id': 196,
|
||||
'like_count': int,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/',
|
||||
'md5': 'c1defca721ce25b2354e927d3e4b3dec',
|
||||
'info_dict': {
|
||||
'id': '3928201',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ator e diretor argentino, Ricado Darín fala sobre utopias e suas perdas',
|
||||
'duration': 1472.906,
|
||||
'uploader': 'Canal Brasil',
|
||||
'uploader_id': 705,
|
||||
'like_count': int,
|
||||
}
|
||||
},
|
||||
]
|
||||
}, {
|
||||
'url': 'http://canalbrasil.globo.com/programas/sangue-latino/videos/3928201.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://globosatplay.globo.com/globonews/v/4472924/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://globotv.globo.com/t/programa/v/clipe-sexo-e-as-negas-adeus/3836166/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
class MD5():
|
||||
class MD5:
|
||||
HEX_FORMAT_LOWERCASE = 0
|
||||
HEX_FORMAT_UPPERCASE = 1
|
||||
BASE64_PAD_CHARACTER_DEFAULT_COMPLIANCE = ''
|
||||
@@ -353,9 +332,6 @@ class GloboIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_id = self._search_regex(self._VIDEOID_REGEXES, webpage, 'video id')
|
||||
|
||||
video = self._download_json(
|
||||
self._API_URL_TEMPLATE % video_id, video_id)['videos'][0]
|
||||
|
||||
@@ -364,7 +340,7 @@ class GloboIE(InfoExtractor):
|
||||
formats = []
|
||||
for resource in video['resources']:
|
||||
resource_id = resource.get('_id')
|
||||
if not resource_id:
|
||||
if not resource_id or resource_id.endswith('manifest'):
|
||||
continue
|
||||
|
||||
security = self._download_json(
|
||||
@@ -393,20 +369,23 @@ class GloboIE(InfoExtractor):
|
||||
resource_url = resource['url']
|
||||
signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash')
|
||||
if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(signed_url, resource_id, 'mp4'))
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
signed_url, resource_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
if m3u8_formats:
|
||||
formats.extend(m3u8_formats)
|
||||
else:
|
||||
formats.append({
|
||||
'url': signed_url,
|
||||
'format_id': resource_id,
|
||||
'height': resource.get('height'),
|
||||
'format_id': 'http-%s' % resource_id,
|
||||
'height': int_or_none(resource.get('height')),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = float_or_none(video.get('duration'), 1000)
|
||||
like_count = int_or_none(video.get('likes'))
|
||||
uploader = video.get('channel')
|
||||
uploader_id = video.get('channel_id')
|
||||
uploader_id = str_or_none(video.get('channel_id'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -414,6 +393,46 @@ class GloboIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'like_count': like_count,
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
|
||||
class GloboArticleIE(InfoExtractor):
|
||||
_VALID_URL = 'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/]+)\.html'
|
||||
|
||||
_VIDEOID_REGEXES = [
|
||||
r'\bdata-video-id=["\'](\d{7,})',
|
||||
r'\bdata-player-videosids=["\'](\d{7,})',
|
||||
r'\bvideosIDs\s*:\s*["\'](\d{7,})',
|
||||
r'\bdata-id=["\'](\d{7,})',
|
||||
r'<div[^>]+\bid=["\'](\d{7,})',
|
||||
]
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://g1.globo.com/jornal-nacional/noticia/2014/09/novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes.html',
|
||||
'md5': '307fdeae4390ccfe6ba1aa198cf6e72b',
|
||||
'info_dict': {
|
||||
'id': '3652183',
|
||||
'ext': 'mp4',
|
||||
'title': 'Receita Federal explica como vai fiscalizar bagagens de quem retorna ao Brasil de avião',
|
||||
'duration': 110.711,
|
||||
'uploader': 'Rede Globo',
|
||||
'uploader_id': '196',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://gq.globo.com/Prazeres/Poder/noticia/2015/10/all-o-desafio-assista-ao-segundo-capitulo-da-serie.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://gshow.globo.com/programas/tv-xuxa/O-Programa/noticia/2014/01/xuxa-e-junno-namoram-muuuito-em-luau-de-zeze-di-camargo-e-luciano.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if GloboIE.suitable(url) else super(GloboArticleIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(self._VIDEOID_REGEXES, webpage, 'video id')
|
||||
return self.url_result('globo:%s' % video_id, 'Globo')
|
||||
|
@@ -61,7 +61,7 @@ class GooglePlusIE(InfoExtractor):
|
||||
'width': int(width),
|
||||
'height': int(height),
|
||||
} for width, height, video_url in re.findall(
|
||||
r'\d+,(\d+),(\d+),"(https?://redirector\.googlevideo\.com.*?)"', webpage)]
|
||||
r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent.com.*?)"', webpage)]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@@ -4,12 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
urlencode_postdata,
|
||||
urlhandle_detect_ext,
|
||||
@@ -47,7 +45,7 @@ class HearThisAtIE(InfoExtractor):
|
||||
r'intTrackId\s*=\s*(\d+)', webpage, 'track ID')
|
||||
|
||||
payload = urlencode_postdata({'tracks[]': track_id})
|
||||
req = compat_urllib_request.Request(self._PLAYLIST_URL, payload)
|
||||
req = sanitized_Request(self._PLAYLIST_URL, payload)
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
|
||||
track = self._download_json(req, track_id, 'Downloading playlist')[0]
|
||||
|
@@ -3,13 +3,11 @@ from __future__ import unicode_literals
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -41,7 +39,7 @@ class HotNewHipHopIE(InfoExtractor):
|
||||
('mediaType', 's'),
|
||||
('mediaId', video_id),
|
||||
])
|
||||
r = compat_urllib_request.Request(
|
||||
r = sanitized_Request(
|
||||
'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata)
|
||||
r.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
mkd = self._download_json(
|
||||
|
@@ -4,12 +4,10 @@ import json
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -32,7 +30,7 @@ class HypemIE(InfoExtractor):
|
||||
data = {'ax': 1, 'ts': time.time()}
|
||||
data_encoded = compat_urllib_parse.urlencode(data)
|
||||
complete_url = url + "?" + data_encoded
|
||||
request = compat_urllib_request.Request(complete_url)
|
||||
request = sanitized_Request(complete_url)
|
||||
response, urlh = self._download_webpage_handle(
|
||||
request, track_id, 'Downloading webpage with the url')
|
||||
cookie = urlh.headers.get('Set-Cookie', '')
|
||||
@@ -52,7 +50,7 @@ class HypemIE(InfoExtractor):
|
||||
title = track['song']
|
||||
|
||||
serve_url = "http://hypem.com/serve/source/%s/%s" % (track_id, key)
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
serve_url, '', {'Content-Type': 'application/json'})
|
||||
request.add_header('cookie', cookie)
|
||||
song_data = self._download_json(request, track_id, 'Downloading metadata')
|
||||
|
@@ -4,8 +4,8 @@ import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
from ..utils import (
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
@@ -30,24 +30,33 @@ class ImdbIE(InfoExtractor):
|
||||
descr = self._html_search_regex(
|
||||
r'(?s)<span itemprop="description">(.*?)</span>',
|
||||
webpage, 'description', fatal=False)
|
||||
available_formats = re.findall(
|
||||
r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage,
|
||||
flags=re.MULTILINE)
|
||||
player_url = 'http://www.imdb.com/video/imdb/vi%s/imdb/single' % video_id
|
||||
player_page = self._download_webpage(
|
||||
player_url, video_id, 'Downloading player page')
|
||||
# the player page contains the info for the default format, we have to
|
||||
# fetch other pages for the rest of the formats
|
||||
extra_formats = re.findall(r'href="(?P<url>%s.*?)".*?>(?P<name>.*?)<' % re.escape(player_url), player_page)
|
||||
format_pages = [
|
||||
self._download_webpage(
|
||||
f_url, video_id, 'Downloading info for %s format' % f_name)
|
||||
for f_url, f_name in extra_formats]
|
||||
format_pages.append(player_page)
|
||||
|
||||
quality = qualities(['SD', '480p', '720p'])
|
||||
formats = []
|
||||
for f_id, f_path in available_formats:
|
||||
f_path = f_path.strip()
|
||||
format_page = self._download_webpage(
|
||||
compat_urlparse.urljoin(url, f_path),
|
||||
'Downloading info for %s format' % f_id)
|
||||
for format_page in format_pages:
|
||||
json_data = self._search_regex(
|
||||
r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
|
||||
format_page, 'json data', flags=re.DOTALL)
|
||||
info = json.loads(json_data)
|
||||
format_info = info['videoPlayerObject']['video']
|
||||
f_id = format_info['ffname']
|
||||
formats.append({
|
||||
'format_id': f_id,
|
||||
'url': format_info['videoInfoList'][0]['videoUrl'],
|
||||
'quality': quality(f_id),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -10,8 +10,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class InstagramIE(InfoExtractor):
|
||||
_VALID_URL = r'https://instagram\.com/p/(?P<id>[\da-zA-Z]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||
'md5': '0d2da106a9d2631273e192b372806516',
|
||||
'info_dict': {
|
||||
@@ -21,7 +21,10 @@ class InstagramIE(InfoExtractor):
|
||||
'title': 'Video by naomipq',
|
||||
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://instagram.com/p/-Cmh1cukG2/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@@ -6,12 +6,10 @@ from random import random
|
||||
from math import floor
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
remove_end,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -61,7 +59,7 @@ class IPrimaIE(InfoExtractor):
|
||||
(floor(random() * 1073741824), floor(random() * 1073741824))
|
||||
)
|
||||
|
||||
req = compat_urllib_request.Request(player_url)
|
||||
req = sanitized_Request(player_url)
|
||||
req.add_header('Referer', url)
|
||||
playerpage = self._download_webpage(req, video_id)
|
||||
|
||||
|
@@ -205,9 +205,9 @@ class IqiyiIE(InfoExtractor):
|
||||
|
||||
def get_enc_key(self, swf_url, video_id):
|
||||
# TODO: automatic key extraction
|
||||
# last update at 2015-10-10 for Zombie::bite
|
||||
# '7239670519b6ac209a0bee4ef0446a6b24894b8ac2751506e42116212a0d0272e505'[2:66][1::2]
|
||||
enc_key = '97596c0abee04ab49ba25564161ad225'
|
||||
# last update at 2015-10-22 for Zombie::bite
|
||||
# '7223c67061dbea1259d0ceb44f44b6d62288f4f80c972170de5201d2321060270e05'[2:66][0::2]
|
||||
enc_key = '2c76de15dcb44bd28ff0927d50d31620'
|
||||
return enc_key
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -5,11 +5,9 @@ import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -78,7 +76,7 @@ class IviIE(InfoExtractor):
|
||||
]
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(api_url, json.dumps(data))
|
||||
request = sanitized_Request(api_url, json.dumps(data))
|
||||
|
||||
video_json_page = self._download_webpage(
|
||||
request, video_id, 'Downloading video JSON')
|
||||
|
@@ -2,12 +2,18 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unsmuggle_url,
|
||||
)
|
||||
|
||||
|
||||
@@ -16,7 +22,7 @@ class KalturaIE(InfoExtractor):
|
||||
(?:
|
||||
kaltura:(?P<partner_id_s>\d+):(?P<id_s>[0-9a-z_]+)|
|
||||
https?://
|
||||
(:?(?:www|cdnapisec)\.)?kaltura\.com/
|
||||
(:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/
|
||||
(?:
|
||||
(?:
|
||||
# flash player
|
||||
@@ -121,31 +127,47 @@ class KalturaIE(InfoExtractor):
|
||||
video_id, actions, note='Downloading video info JSON')
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
partner_id = mobj.group('partner_id_s') or mobj.group('partner_id') or mobj.group('partner_id_html5')
|
||||
entry_id = mobj.group('id_s') or mobj.group('id') or mobj.group('id_html5')
|
||||
|
||||
info, source_data = self._get_video_info(entry_id, partner_id)
|
||||
|
||||
formats = [{
|
||||
'format_id': '%(fileExt)s-%(bitrate)s' % f,
|
||||
'ext': f['fileExt'],
|
||||
'tbr': f['bitrate'],
|
||||
'fps': f.get('frameRate'),
|
||||
'filesize_approx': int_or_none(f.get('size'), invscale=1024),
|
||||
'container': f.get('containerFormat'),
|
||||
'vcodec': f.get('videoCodecId'),
|
||||
'height': f.get('height'),
|
||||
'width': f.get('width'),
|
||||
'url': '%s/flavorId/%s' % (info['dataUrl'], f['id']),
|
||||
} for f in source_data['flavorAssets']]
|
||||
source_url = smuggled_data.get('source_url')
|
||||
if source_url:
|
||||
referrer = base64.b64encode(
|
||||
'://'.join(compat_urlparse.urlparse(source_url)[:2])
|
||||
.encode('utf-8')).decode('utf-8')
|
||||
else:
|
||||
referrer = None
|
||||
|
||||
formats = []
|
||||
for f in source_data['flavorAssets']:
|
||||
video_url = '%s/flavorId/%s' % (info['dataUrl'], f['id'])
|
||||
if referrer:
|
||||
video_url += '?referrer=%s' % referrer
|
||||
formats.append({
|
||||
'format_id': '%(fileExt)s-%(bitrate)s' % f,
|
||||
'ext': f.get('fileExt'),
|
||||
'tbr': int_or_none(f['bitrate']),
|
||||
'fps': int_or_none(f.get('frameRate')),
|
||||
'filesize_approx': int_or_none(f.get('size'), invscale=1024),
|
||||
'container': f.get('containerFormat'),
|
||||
'vcodec': f.get('videoCodecId'),
|
||||
'height': int_or_none(f.get('height')),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'url': video_url,
|
||||
})
|
||||
self._check_formats(formats, entry_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': entry_id,
|
||||
'title': info['name'],
|
||||
'formats': formats,
|
||||
'description': info.get('description'),
|
||||
'description': clean_html(info.get('description')),
|
||||
'thumbnail': info.get('thumbnailUrl'),
|
||||
'duration': info.get('duration'),
|
||||
'timestamp': info.get('createdAt'),
|
||||
|
@@ -4,10 +4,8 @@ import os
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import sanitized_Request
|
||||
|
||||
|
||||
class KeezMoviesIE(InfoExtractor):
|
||||
@@ -26,7 +24,7 @@ class KeezMoviesIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
|
@@ -8,14 +8,15 @@ import time
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
compat_ord,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
int_or_none,
|
||||
encode_data_uri,
|
||||
)
|
||||
|
||||
|
||||
@@ -25,15 +26,16 @@ class LetvIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.letv.com/ptv/vplay/22005890.html',
|
||||
'md5': 'cab23bd68d5a8db9be31c9a222c1e8df',
|
||||
'md5': 'edadcfe5406976f42f9f266057ee5e40',
|
||||
'info_dict': {
|
||||
'id': '22005890',
|
||||
'ext': 'mp4',
|
||||
'title': '第87届奥斯卡颁奖礼完美落幕 《鸟人》成最大赢家',
|
||||
'timestamp': 1424747397,
|
||||
'upload_date': '20150224',
|
||||
'description': 'md5:a9cb175fd753e2962176b7beca21a47c',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
'hls_prefer_native': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.letv.com/ptv/vplay/1415246.html',
|
||||
'info_dict': {
|
||||
@@ -42,16 +44,22 @@ class LetvIE(InfoExtractor):
|
||||
'title': '美人天下01',
|
||||
'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda',
|
||||
},
|
||||
'params': {
|
||||
'hls_prefer_native': True,
|
||||
},
|
||||
}, {
|
||||
'note': 'This video is available only in Mainland China, thus a proxy is needed',
|
||||
'url': 'http://www.letv.com/ptv/vplay/1118082.html',
|
||||
'md5': 'f80936fbe20fb2f58648e81386ff7927',
|
||||
'md5': '2424c74948a62e5f31988438979c5ad1',
|
||||
'info_dict': {
|
||||
'id': '1118082',
|
||||
'ext': 'mp4',
|
||||
'title': '与龙共舞 完整版',
|
||||
'description': 'md5:7506a5eeb1722bb9d4068f85024e3986',
|
||||
},
|
||||
'params': {
|
||||
'hls_prefer_native': True,
|
||||
},
|
||||
'skip': 'Only available in China',
|
||||
}]
|
||||
|
||||
@@ -74,6 +82,27 @@ class LetvIE(InfoExtractor):
|
||||
_loc3_ = self.ror(_loc3_, _loc2_ % 17)
|
||||
return _loc3_
|
||||
|
||||
# see M3U8Encryption class in KLetvPlayer.swf
|
||||
@staticmethod
|
||||
def decrypt_m3u8(encrypted_data):
|
||||
if encrypted_data[:5].decode('utf-8').lower() != 'vc_01':
|
||||
return encrypted_data
|
||||
encrypted_data = encrypted_data[5:]
|
||||
|
||||
_loc4_ = bytearray()
|
||||
while encrypted_data:
|
||||
b = compat_ord(encrypted_data[0])
|
||||
_loc4_.extend([b // 16, b & 0x0f])
|
||||
encrypted_data = encrypted_data[1:]
|
||||
idx = len(_loc4_) - 11
|
||||
_loc4_ = _loc4_[idx:] + _loc4_[:idx]
|
||||
_loc7_ = bytearray()
|
||||
while _loc4_:
|
||||
_loc7_.append(_loc4_[0] * 16 + _loc4_[1])
|
||||
_loc4_ = _loc4_[2:]
|
||||
|
||||
return bytes(_loc7_)
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
page = self._download_webpage(url, media_id)
|
||||
@@ -85,7 +114,7 @@ class LetvIE(InfoExtractor):
|
||||
'tkey': self.calc_time_key(int(time.time())),
|
||||
'domain': 'www.letv.com'
|
||||
}
|
||||
play_json_req = compat_urllib_request.Request(
|
||||
play_json_req = sanitized_Request(
|
||||
'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
|
||||
)
|
||||
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
|
||||
@@ -115,23 +144,28 @@ class LetvIE(InfoExtractor):
|
||||
for format_id in formats:
|
||||
if format_id in dispatch:
|
||||
media_url = playurl['domain'][0] + dispatch[format_id][0]
|
||||
|
||||
# Mimic what flvxz.com do
|
||||
url_parts = list(compat_urlparse.urlparse(media_url))
|
||||
qs = dict(compat_urlparse.parse_qs(url_parts[4]))
|
||||
qs.update({
|
||||
'platid': '14',
|
||||
'splatid': '1401',
|
||||
'tss': 'no',
|
||||
'retry': 1
|
||||
media_url += '&' + compat_urllib_parse.urlencode({
|
||||
'm3v': 1,
|
||||
'format': 1,
|
||||
'expect': 3,
|
||||
'rateid': format_id,
|
||||
})
|
||||
url_parts[4] = compat_urllib_parse.urlencode(qs)
|
||||
media_url = compat_urlparse.urlunparse(url_parts)
|
||||
|
||||
nodes_data = self._download_json(
|
||||
media_url, media_id,
|
||||
'Download JSON metadata for format %s' % format_id)
|
||||
|
||||
req = self._request_webpage(
|
||||
nodes_data['nodelist'][0]['location'], media_id,
|
||||
note='Downloading m3u8 information for format %s' % format_id)
|
||||
|
||||
m3u8_data = self.decrypt_m3u8(req.read())
|
||||
|
||||
url_info_dict = {
|
||||
'url': media_url,
|
||||
'url': encode_data_uri(m3u8_data, 'application/vnd.apple.mpegurl'),
|
||||
'ext': determine_ext(dispatch[format_id][1]),
|
||||
'format_id': format_id,
|
||||
'protocol': 'm3u8',
|
||||
}
|
||||
|
||||
if format_id[-1:] == 'p':
|
||||
|
@@ -7,12 +7,12 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ class LyndaBaseIE(InfoExtractor):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
@@ -35,7 +35,7 @@ class LyndaBaseIE(InfoExtractor):
|
||||
'remember': 'false',
|
||||
'stayPut': 'false'
|
||||
}
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
login_page = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
@@ -64,7 +64,7 @@ class LyndaBaseIE(InfoExtractor):
|
||||
'remember': 'false',
|
||||
'stayPut': 'false',
|
||||
}
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form).encode('utf-8'))
|
||||
login_page = self._download_webpage(
|
||||
request, None,
|
||||
@@ -82,6 +82,15 @@ class LyndaBaseIE(InfoExtractor):
|
||||
expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _logout(self):
|
||||
username, _ = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
self._download_webpage(
|
||||
'http://www.lynda.com/ajax/logout.aspx', None,
|
||||
'Logging out', 'Unable to log out', fatal=False)
|
||||
|
||||
|
||||
class LyndaIE(LyndaBaseIE):
|
||||
IE_NAME = 'lynda'
|
||||
@@ -108,50 +117,47 @@ class LyndaIE(LyndaBaseIE):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
page = self._download_webpage(
|
||||
video = self._download_json(
|
||||
'http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
video_json = json.loads(page)
|
||||
|
||||
if 'Status' in video_json:
|
||||
if 'Status' in video:
|
||||
raise ExtractorError(
|
||||
'lynda returned error: %s' % video_json['Message'], expected=True)
|
||||
'lynda returned error: %s' % video['Message'], expected=True)
|
||||
|
||||
if video_json['HasAccess'] is False:
|
||||
if video.get('HasAccess') is False:
|
||||
self.raise_login_required('Video %s is only available for members' % video_id)
|
||||
|
||||
video_id = compat_str(video_json['ID'])
|
||||
duration = video_json['DurationInSeconds']
|
||||
title = video_json['Title']
|
||||
video_id = compat_str(video.get('ID') or video_id)
|
||||
duration = int_or_none(video.get('DurationInSeconds'))
|
||||
title = video['Title']
|
||||
|
||||
formats = []
|
||||
|
||||
fmts = video_json.get('Formats')
|
||||
fmts = video.get('Formats')
|
||||
if fmts:
|
||||
formats.extend([
|
||||
{
|
||||
'url': fmt['Url'],
|
||||
'ext': fmt['Extension'],
|
||||
'width': fmt['Width'],
|
||||
'height': fmt['Height'],
|
||||
'filesize': fmt['FileSize'],
|
||||
'format_id': str(fmt['Resolution'])
|
||||
} for fmt in fmts])
|
||||
formats.extend([{
|
||||
'url': f['Url'],
|
||||
'ext': f.get('Extension'),
|
||||
'width': int_or_none(f.get('Width')),
|
||||
'height': int_or_none(f.get('Height')),
|
||||
'filesize': int_or_none(f.get('FileSize')),
|
||||
'format_id': compat_str(f.get('Resolution')) if f.get('Resolution') else None,
|
||||
} for f in fmts if f.get('Url')])
|
||||
|
||||
prioritized_streams = video_json.get('PrioritizedStreams')
|
||||
prioritized_streams = video.get('PrioritizedStreams')
|
||||
if prioritized_streams:
|
||||
formats.extend([
|
||||
{
|
||||
for prioritized_stream_id, prioritized_stream in prioritized_streams.items():
|
||||
formats.extend([{
|
||||
'url': video_url,
|
||||
'width': int_or_none(format_id),
|
||||
'format_id': format_id,
|
||||
} for format_id, video_url in prioritized_streams['0'].items()
|
||||
])
|
||||
'format_id': '%s-%s' % (prioritized_stream_id, format_id),
|
||||
} for format_id, video_url in prioritized_stream.items()])
|
||||
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = self.extract_subtitles(video_id, page)
|
||||
subtitles = self.extract_subtitles(video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -182,7 +188,7 @@ class LyndaIE(LyndaBaseIE):
|
||||
if srt:
|
||||
return srt
|
||||
|
||||
def _get_subtitles(self, video_id, webpage):
|
||||
def _get_subtitles(self, video_id):
|
||||
url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
|
||||
subs = self._download_json(url, None, False)
|
||||
if subs:
|
||||
@@ -204,12 +210,13 @@ class LyndaCourseIE(LyndaBaseIE):
|
||||
course_path = mobj.group('coursepath')
|
||||
course_id = mobj.group('courseid')
|
||||
|
||||
page = self._download_webpage(
|
||||
course = self._download_json(
|
||||
'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
|
||||
course_id, 'Downloading course JSON')
|
||||
course_json = json.loads(page)
|
||||
|
||||
if 'Status' in course_json and course_json['Status'] == 'NotFound':
|
||||
self._logout()
|
||||
|
||||
if course.get('Status') == 'NotFound':
|
||||
raise ExtractorError(
|
||||
'Course %s does not exist' % course_id, expected=True)
|
||||
|
||||
@@ -219,12 +226,13 @@ class LyndaCourseIE(LyndaBaseIE):
|
||||
# Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
|
||||
# by single video API anymore
|
||||
|
||||
for chapter in course_json['Chapters']:
|
||||
for video in chapter['Videos']:
|
||||
if video['HasAccess'] is False:
|
||||
for chapter in course['Chapters']:
|
||||
for video in chapter.get('Videos', []):
|
||||
if video.get('HasAccess') is False:
|
||||
unaccessible_videos += 1
|
||||
continue
|
||||
videos.append(video['ID'])
|
||||
if video.get('ID'):
|
||||
videos.append(video['ID'])
|
||||
|
||||
if unaccessible_videos > 0:
|
||||
self._downloader.report_warning(
|
||||
@@ -237,6 +245,6 @@ class LyndaCourseIE(LyndaBaseIE):
|
||||
'Lynda')
|
||||
for video_id in videos]
|
||||
|
||||
course_title = course_json['Title']
|
||||
course_title = course.get('Title')
|
||||
|
||||
return self.playlist_result(entries, course_id, course_title)
|
||||
|
@@ -1,64 +1,169 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class MDRIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?P<domain>https?://(?:www\.)?mdr\.de)/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)(?:_|\.html)'
|
||||
IE_DESC = 'MDR.DE and KiKA'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+(?P<id>\d+)(?:_.+?)?\.html'
|
||||
|
||||
# No tests, MDR regularily deletes its videos
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# MDR regularily deletes its videos
|
||||
'url': 'http://www.mdr.de/fakt/video189002.html',
|
||||
'only_matching': True,
|
||||
}
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://www.mdr.de/kultur/audio1312272_zc-15948bad_zs-86171fdd.html',
|
||||
'md5': '64c4ee50f0a791deb9479cd7bbe9d2fa',
|
||||
'info_dict': {
|
||||
'id': '1312272',
|
||||
'ext': 'mp3',
|
||||
'title': 'Feuilleton vom 30. Oktober 2015',
|
||||
'duration': 250,
|
||||
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
|
||||
'md5': '4930515e36b06c111213e80d1e4aad0e',
|
||||
'info_dict': {
|
||||
'id': '19636',
|
||||
'ext': 'mp4',
|
||||
'title': 'Baumhaus vom 30. Oktober 2015',
|
||||
'duration': 134,
|
||||
'uploader': 'KIKA',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
|
||||
'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
|
||||
'info_dict': {
|
||||
'id': '8182',
|
||||
'ext': 'mp4',
|
||||
'title': 'Beutolomäus und der geheime Weihnachtswunsch',
|
||||
'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
|
||||
'timestamp': 1419047100,
|
||||
'upload_date': '20141220',
|
||||
'duration': 4628,
|
||||
'uploader': 'KIKA',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('video_id')
|
||||
domain = m.group('domain')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# determine title and media streams from webpage
|
||||
html = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(r'<h[12]>(.*?)</h[12]>', html, 'title')
|
||||
xmlurl = self._search_regex(
|
||||
r'dataURL:\'(/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, 'XML URL')
|
||||
data_url = self._search_regex(
|
||||
r'dataURL\s*:\s*(["\'])(?P<url>/.+/(?:video|audio)[0-9]+-avCustom\.xml)\1',
|
||||
webpage, 'data url', group='url')
|
||||
|
||||
doc = self._download_xml(
|
||||
compat_urlparse.urljoin(url, data_url), video_id)
|
||||
|
||||
title = xpath_text(doc, ['./title', './broadcast/broadcastName'], 'title', fatal=True)
|
||||
|
||||
doc = self._download_xml(domain + xmlurl, video_id)
|
||||
formats = []
|
||||
for a in doc.findall('./assets/asset'):
|
||||
url_el = a.find('./progressiveDownloadUrl')
|
||||
if url_el is None:
|
||||
continue
|
||||
abr = int(a.find('bitrateAudio').text) // 1000
|
||||
media_type = a.find('mediaType').text
|
||||
format = {
|
||||
'abr': abr,
|
||||
'filesize': int(a.find('fileSize').text),
|
||||
'url': url_el.text,
|
||||
}
|
||||
processed_urls = []
|
||||
for asset in doc.findall('./assets/asset'):
|
||||
for source in (
|
||||
'progressiveDownload',
|
||||
'dynamicHttpStreamingRedirector',
|
||||
'adaptiveHttpStreamingRedirector'):
|
||||
url_el = asset.find('./%sUrl' % source)
|
||||
if url_el is None:
|
||||
continue
|
||||
|
||||
video_url = url_el.text
|
||||
if video_url in processed_urls:
|
||||
continue
|
||||
|
||||
processed_urls.append(video_url)
|
||||
|
||||
vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
|
||||
abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
|
||||
|
||||
ext = determine_ext(url_el.text)
|
||||
if ext == 'm3u8':
|
||||
url_formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
preference=0, m3u8_id='HLS', fatal=False)
|
||||
elif ext == 'f4m':
|
||||
url_formats = self._extract_f4m_formats(
|
||||
video_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id,
|
||||
preference=0, f4m_id='HDS', fatal=False)
|
||||
else:
|
||||
media_type = xpath_text(asset, './mediaType', 'media type', default='MP4')
|
||||
vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
|
||||
abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
|
||||
filesize = int_or_none(xpath_text(asset, './fileSize', 'file size'))
|
||||
|
||||
f = {
|
||||
'url': video_url,
|
||||
'format_id': '%s-%d' % (media_type, vbr or abr),
|
||||
'filesize': filesize,
|
||||
'abr': abr,
|
||||
'preference': 1,
|
||||
}
|
||||
|
||||
if vbr:
|
||||
width = int_or_none(xpath_text(asset, './frameWidth', 'width'))
|
||||
height = int_or_none(xpath_text(asset, './frameHeight', 'height'))
|
||||
f.update({
|
||||
'vbr': vbr,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
|
||||
url_formats = [f]
|
||||
|
||||
if not url_formats:
|
||||
continue
|
||||
|
||||
if not vbr:
|
||||
for f in url_formats:
|
||||
abr = f.get('tbr') or abr
|
||||
if 'tbr' in f:
|
||||
del f['tbr']
|
||||
f.update({
|
||||
'abr': abr,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
formats.extend(url_formats)
|
||||
|
||||
vbr_el = a.find('bitrateVideo')
|
||||
if vbr_el is None:
|
||||
format.update({
|
||||
'vcodec': 'none',
|
||||
'format_id': '%s-%d' % (media_type, abr),
|
||||
})
|
||||
else:
|
||||
vbr = int(vbr_el.text) // 1000
|
||||
format.update({
|
||||
'vbr': vbr,
|
||||
'width': int(a.find('frameWidth').text),
|
||||
'height': int(a.find('frameHeight').text),
|
||||
'format_id': '%s-%d' % (media_type, vbr),
|
||||
})
|
||||
formats.append(format)
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = xpath_text(doc, './broadcast/broadcastDescription', 'description')
|
||||
timestamp = parse_iso8601(
|
||||
xpath_text(
|
||||
doc, [
|
||||
'./broadcast/broadcastDate',
|
||||
'./broadcast/broadcastStartDate',
|
||||
'./broadcast/broadcastEndDate'],
|
||||
'timestamp', default=None))
|
||||
duration = parse_duration(xpath_text(doc, './duration', 'duration'))
|
||||
uploader = xpath_text(doc, './rights', 'uploader')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -7,12 +7,12 @@ from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -117,7 +117,7 @@ class MetacafeIE(InfoExtractor):
|
||||
'filters': '0',
|
||||
'submit': "Continue - I'm over 18",
|
||||
}
|
||||
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
|
||||
request = sanitized_Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
self.report_age_confirmation()
|
||||
self._download_webpage(request, None, False, 'Unable to confirm age')
|
||||
@@ -142,7 +142,7 @@ class MetacafeIE(InfoExtractor):
|
||||
return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
|
||||
req = sanitized_Request('http://www.metacafe.com/watch/%s/' % video_id)
|
||||
|
||||
# AnyClip videos require the flashversion cookie so that we get the link
|
||||
# to the mp4 file
|
||||
|
@@ -2,14 +2,12 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_filesize,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -39,7 +37,7 @@ class MinhatecaIE(InfoExtractor):
|
||||
('fileId', video_id),
|
||||
('__RequestVerificationToken', token),
|
||||
]
|
||||
req = compat_urllib_request.Request(
|
||||
req = sanitized_Request(
|
||||
'http://minhateca.com.br/action/License/Download',
|
||||
data=compat_urllib_parse.urlencode(token_data))
|
||||
req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -51,6 +52,8 @@ class MioMioIE(InfoExtractor):
|
||||
mioplayer_path = self._search_regex(
|
||||
r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path')
|
||||
|
||||
http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path}
|
||||
|
||||
xml_config = self._search_regex(
|
||||
r'flashvars="type=(?:sina|video)&(.+?)&',
|
||||
webpage, 'xml config')
|
||||
@@ -60,14 +63,12 @@ class MioMioIE(InfoExtractor):
|
||||
'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)),
|
||||
video_id)
|
||||
|
||||
# the following xml contains the actual configuration information on the video file(s)
|
||||
vid_config = self._download_xml(
|
||||
vid_config_request = sanitized_Request(
|
||||
'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config),
|
||||
video_id)
|
||||
headers=http_headers)
|
||||
|
||||
http_headers = {
|
||||
'Referer': 'http://www.miomio.tv%s' % mioplayer_path,
|
||||
}
|
||||
# the following xml contains the actual configuration information on the video file(s)
|
||||
vid_config = self._download_xml(vid_config_request, video_id)
|
||||
|
||||
if not int_or_none(xpath_text(vid_config, 'timelength')):
|
||||
raise ExtractorError('Unable to load videos!', expected=True)
|
||||
|
@@ -86,7 +86,7 @@ class MITIE(TechTVMITIE):
|
||||
webpage = self._download_webpage(url, page_title)
|
||||
embed_url = self._search_regex(
|
||||
r'<iframe .*?src="(.+?)"', webpage, 'embed url')
|
||||
return self.url_result(embed_url, ie='TechTVMIT')
|
||||
return self.url_result(embed_url)
|
||||
|
||||
|
||||
class OCWMITIE(InfoExtractor):
|
||||
|
@@ -1,7 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
encode_dict,
|
||||
get_element_by_attribute,
|
||||
@@ -15,7 +18,7 @@ class MiTeleIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
|
||||
'md5': 'ace7635b2a0b286aaa37d3ff192d2a8a',
|
||||
'md5': '0ff1a13aebb35d9bc14081ff633dd324',
|
||||
'info_dict': {
|
||||
'id': '0NF1jJnxS1Wu3pHrmvFyw2',
|
||||
'display_id': 'programa-144',
|
||||
@@ -34,6 +37,7 @@ class MiTeleIE(InfoExtractor):
|
||||
|
||||
config_url = self._search_regex(
|
||||
r'data-config\s*=\s*"([^"]+)"', webpage, 'data config url')
|
||||
config_url = compat_urlparse.urljoin(url, config_url)
|
||||
|
||||
config = self._download_json(
|
||||
config_url, display_id, 'Downloading config JSON')
|
||||
@@ -56,7 +60,7 @@ class MiTeleIE(InfoExtractor):
|
||||
'sta': '0',
|
||||
}
|
||||
media = self._download_json(
|
||||
'%s/?%s' % (gat, compat_urllib_parse.urlencode(encode_dict(token_data)).encode('utf-8')),
|
||||
'%s/?%s' % (gat, compat_urllib_parse.urlencode(encode_dict(token_data))),
|
||||
display_id, 'Downloading %s JSON' % location['loc'])
|
||||
file_ = media.get('file')
|
||||
if not file_:
|
||||
|
@@ -5,13 +5,11 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -80,7 +78,7 @@ class MoeVideoIE(InfoExtractor):
|
||||
]
|
||||
r_json = json.dumps(r)
|
||||
post = compat_urllib_parse.urlencode({'r': r_json})
|
||||
req = compat_urllib_request.Request(self._API_URL, post)
|
||||
req = sanitized_Request(self._API_URL, post)
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
|
||||
response = self._download_json(req, video_id)
|
||||
|
@@ -7,8 +7,8 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import sanitized_Request
|
||||
|
||||
|
||||
class MofosexIE(InfoExtractor):
|
||||
@@ -29,7 +29,7 @@ class MofosexIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
url = 'http://www.' + mobj.group('url')
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
|
@@ -5,19 +5,17 @@ import os.path
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
remove_start,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class MonikerIE(InfoExtractor):
|
||||
IE_DESC = 'allmyvideos.net and vidspot.net'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?P<id>[a-zA-Z0-9_-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?:(?:2|v)/v-)?(?P<id>[a-zA-Z0-9_-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://allmyvideos.net/jih3nce3x6wn',
|
||||
@@ -46,6 +44,18 @@ class MonikerIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.vidspot.net/l2ngsmhs8ci5',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vidspot.net/2/v-ywDf99',
|
||||
'md5': '5f8254ce12df30479428b0152fb8e7ba',
|
||||
'info_dict': {
|
||||
'id': 'ywDf99',
|
||||
'ext': 'mp4',
|
||||
'title': 'IL FAIT LE MALIN EN PORSHE CAYENNE ( mais pas pour longtemps)',
|
||||
'description': 'IL FAIT LE MALIN EN PORSHE CAYENNE.',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://allmyvideos.net/v/v-HXZm5t',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -64,18 +74,30 @@ class MonikerIE(InfoExtractor):
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
|
||||
data = dict(fields)
|
||||
builtin_url = self._search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+?/builtin-.+?)\1',
|
||||
orig_webpage, 'builtin URL', default=None, group='url')
|
||||
|
||||
post = compat_urllib_parse.urlencode(data)
|
||||
headers = {
|
||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||
}
|
||||
req = compat_urllib_request.Request(url, post, headers)
|
||||
webpage = self._download_webpage(
|
||||
req, video_id, note='Downloading video page ...')
|
||||
if builtin_url:
|
||||
req = sanitized_Request(builtin_url)
|
||||
req.add_header('Referer', url)
|
||||
webpage = self._download_webpage(req, video_id, 'Downloading builtin page')
|
||||
title = self._og_search_title(orig_webpage).strip()
|
||||
description = self._og_search_description(orig_webpage).strip()
|
||||
else:
|
||||
fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
|
||||
data = dict(fields)
|
||||
|
||||
title = os.path.splitext(data['fname'])[0]
|
||||
post = compat_urllib_parse.urlencode(data)
|
||||
headers = {
|
||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||
}
|
||||
req = sanitized_Request(url, post, headers)
|
||||
webpage = self._download_webpage(
|
||||
req, video_id, note='Downloading video page ...')
|
||||
|
||||
title = os.path.splitext(data['fname'])[0]
|
||||
description = None
|
||||
|
||||
# Could be several links with different quality
|
||||
links = re.findall(r'"file" : "?(.+?)",', webpage)
|
||||
@@ -89,5 +111,6 @@ class MonikerIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -3,12 +3,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -59,7 +57,7 @@ class MooshareIE(InfoExtractor):
|
||||
'hash': hash_key,
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
||||
|
@@ -1,80 +1,40 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
)
|
||||
from ..utils import sanitized_Request
|
||||
|
||||
|
||||
class MovieClipsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://movieclips\.com/(?P<id>[\da-zA-Z]+)(?:-(?P<display_id>[\da-z-]+))?'
|
||||
_VALID_URL = r'https?://(?:www.)?movieclips\.com/videos/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://movieclips.com/Wy7ZU-my-week-with-marilyn-movie-do-you-love-me/',
|
||||
'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597?autoPlay=true&playlistId=5',
|
||||
'info_dict': {
|
||||
'id': 'Wy7ZU',
|
||||
'display_id': 'my-week-with-marilyn-movie-do-you-love-me',
|
||||
'id': 'pKIGmG83AqD9',
|
||||
'display_id': 'warcraft-trailer-1-561180739597',
|
||||
'ext': 'mp4',
|
||||
'title': 'My Week with Marilyn - Do You Love Me?',
|
||||
'description': 'md5:e86795bd332fe3cff461e7c8dc542acb',
|
||||
'title': 'Warcraft Trailer 1',
|
||||
'description': 'Watch Trailer 1 from Warcraft (2016). Legendary’s WARCRAFT is a 3D epic adventure of world-colliding conflict based.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
'add_ie': ['ThePlatform'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
show_id = display_id or video_id
|
||||
display_id = self._match_id(url)
|
||||
|
||||
config = self._download_xml(
|
||||
'http://config.movieclips.com/player/config/%s' % video_id,
|
||||
show_id, 'Downloading player config')
|
||||
|
||||
if config.find('./country-region').text == 'false':
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, config.find('./region_alert').text), expected=True)
|
||||
|
||||
properties = config.find('./video/properties')
|
||||
smil_file = properties.attrib['smil_file']
|
||||
|
||||
smil = self._download_xml(smil_file, show_id, 'Downloading SMIL')
|
||||
base_url = smil.find('./head/meta').attrib['base']
|
||||
|
||||
formats = []
|
||||
for video in smil.findall('./body/switch/video'):
|
||||
vbr = int(video.attrib['system-bitrate']) / 1000
|
||||
src = video.attrib['src']
|
||||
formats.append({
|
||||
'url': base_url,
|
||||
'play_path': src,
|
||||
'ext': src.split(':')[0],
|
||||
'vbr': vbr,
|
||||
'format_id': '%dk' % vbr,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = '%s - %s' % (properties.attrib['clip_movie_title'], properties.attrib['clip_title'])
|
||||
description = clean_html(compat_str(properties.attrib['clip_description']))
|
||||
thumbnail = properties.attrib['image']
|
||||
categories = properties.attrib['clip_categories'].split(',')
|
||||
req = sanitized_Request(url)
|
||||
# it doesn't work if it thinks the browser it's too old
|
||||
req.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/43.0 (Chrome)')
|
||||
webpage = self._download_webpage(req, display_id)
|
||||
theplatform_link = self._html_search_regex(r'src="(http://player.theplatform.com/p/.*?)"', webpage, 'theplatform link')
|
||||
title = self._html_search_regex(r'<title[^>]*>([^>]+)-\s*\d+\s*|\s*Movieclips.com</title>', webpage, 'title')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'_type': 'url_transparent',
|
||||
'url': theplatform_link,
|
||||
'title': title,
|
||||
'display_id': display_id,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -5,7 +5,6 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -13,6 +12,7 @@ from ..utils import (
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
HEADRequest,
|
||||
sanitized_Request,
|
||||
unescapeHTML,
|
||||
url_basename,
|
||||
RegexNotFoundError,
|
||||
@@ -53,7 +53,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
|
||||
def _extract_mobile_video_formats(self, mtvn_id):
|
||||
webpage_url = self._MOBILE_TEMPLATE % mtvn_id
|
||||
req = compat_urllib_request.Request(webpage_url)
|
||||
req = sanitized_Request(webpage_url)
|
||||
# Otherwise we get a webpage that would execute some javascript
|
||||
req.add_header('User-Agent', 'curl/7')
|
||||
webpage = self._download_webpage(req, mtvn_id,
|
||||
|
@@ -11,10 +11,10 @@ from ..compat import (
|
||||
compat_ord,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -83,7 +83,7 @@ class MyVideoIE(InfoExtractor):
|
||||
|
||||
mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
|
||||
if mobj is not None:
|
||||
request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
|
||||
request = sanitized_Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
|
||||
response = self._download_webpage(request, video_id,
|
||||
'Downloading video info')
|
||||
info = json.loads(base64.b64decode(response).decode('utf-8'))
|
||||
|
@@ -14,7 +14,8 @@ from ..utils import (
|
||||
|
||||
class NDRBaseIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = next(group for group in mobj.groups() if group)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
return self._extract_embed(webpage, display_id)
|
||||
|
||||
@@ -22,7 +23,7 @@ class NDRBaseIE(InfoExtractor):
|
||||
class NDRIE(NDRBaseIE):
|
||||
IE_NAME = 'ndr'
|
||||
IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
|
||||
_VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)+(?P<id>[^/?#]+),[\da-z]+\.html'
|
||||
_VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
|
||||
_TESTS = [{
|
||||
# httpVideo, same content id
|
||||
'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
|
||||
@@ -77,6 +78,9 @@ class NDRIE(NDRBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_embed(self, webpage, display_id):
|
||||
@@ -101,7 +105,7 @@ class NDRIE(NDRBaseIE):
|
||||
class NJoyIE(NDRBaseIE):
|
||||
IE_NAME = 'njoy'
|
||||
IE_DESC = 'N-JOY'
|
||||
_VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)+(?P<id>[^/?#]+),[\da-z]+\.html'
|
||||
_VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)*(?:(?P<display_id>[^/?#]+),)?(?P<id>[\da-z]+)\.html'
|
||||
_TESTS = [{
|
||||
# httpVideo, same content id
|
||||
'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html',
|
||||
@@ -136,6 +140,9 @@ class NJoyIE(NDRBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.n-joy.de/radio/webradio/morningshow209.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_embed(self, webpage, display_id):
|
||||
@@ -231,7 +238,7 @@ class NDREmbedBaseIE(InfoExtractor):
|
||||
|
||||
class NDREmbedIE(NDREmbedBaseIE):
|
||||
IE_NAME = 'ndr:embed'
|
||||
_VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)+(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
|
||||
_VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
|
||||
'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',
|
||||
@@ -325,7 +332,7 @@ class NDREmbedIE(NDREmbedBaseIE):
|
||||
|
||||
class NJoyEmbedIE(NDREmbedBaseIE):
|
||||
IE_NAME = 'njoy:embed'
|
||||
_VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)+(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html'
|
||||
_VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html'
|
||||
_TESTS = [{
|
||||
# httpVideo
|
||||
'url': 'http://www.n-joy.de/events/reeperbahnfestival/doku948-player_image-bc168e87-5263-4d6d-bd27-bb643005a6de_theme-n-joy.html',
|
||||
|
@@ -8,11 +8,11 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
compat_str,
|
||||
compat_itertools_count,
|
||||
)
|
||||
from ..utils import sanitized_Request
|
||||
|
||||
|
||||
class NetEaseMusicBaseIE(InfoExtractor):
|
||||
@@ -40,7 +40,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||
if not details:
|
||||
continue
|
||||
formats.append({
|
||||
'url': 'http://m1.music.126.net/%s/%s.%s' %
|
||||
'url': 'http://m5.music.126.net/%s/%s.%s' %
|
||||
(cls._encrypt(details['dfsId']), details['dfsId'],
|
||||
details['extension']),
|
||||
'ext': details.get('extension'),
|
||||
@@ -56,7 +56,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||
return int(round(ms / 1000.0))
|
||||
|
||||
def query_api(self, endpoint, video_id, note):
|
||||
req = compat_urllib_request.Request('%s%s' % (self._API_BASE, endpoint))
|
||||
req = sanitized_Request('%s%s' % (self._API_BASE, endpoint))
|
||||
req.add_header('Referer', self._API_BASE)
|
||||
return self._download_json(req, video_id, note)
|
||||
|
||||
|
@@ -1,10 +1,8 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import sanitized_Request
|
||||
|
||||
|
||||
class NFBIE(InfoExtractor):
|
||||
@@ -40,8 +38,9 @@ class NFBIE(InfoExtractor):
|
||||
uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
|
||||
page, 'director name', fatal=False)
|
||||
|
||||
request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id,
|
||||
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
|
||||
request = sanitized_Request(
|
||||
'https://www.nfb.ca/film/%s/player_config' % video_id,
|
||||
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')
|
||||
|
||||
|
@@ -8,7 +8,6 @@ import datetime
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -17,6 +16,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
xpath_text,
|
||||
determine_ext,
|
||||
)
|
||||
@@ -102,7 +102,7 @@ class NiconicoIE(InfoExtractor):
|
||||
'password': password,
|
||||
}
|
||||
login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8')
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
'https://secure.nicovideo.jp/secure/login', login_data)
|
||||
login_results = self._download_webpage(
|
||||
request, None, note='Logging in', errnote='Unable to log in')
|
||||
@@ -145,7 +145,7 @@ class NiconicoIE(InfoExtractor):
|
||||
'k': thumb_play_key,
|
||||
'v': video_id
|
||||
})
|
||||
flv_info_request = compat_urllib_request.Request(
|
||||
flv_info_request = sanitized_Request(
|
||||
'http://ext.nicovideo.jp/thumb_watch', flv_info_data,
|
||||
{'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
flv_info_webpage = self._download_webpage(
|
||||
|
@@ -9,7 +9,6 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
@@ -17,6 +16,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -74,7 +74,7 @@ class NocoIE(InfoExtractor):
|
||||
'username': username,
|
||||
'password': password,
|
||||
}
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
||||
request = sanitized_Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
|
||||
|
||||
login = self._download_json(request, None, 'Logging in as %s' % username)
|
||||
|
@@ -4,11 +4,9 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
@@ -41,7 +39,7 @@ class NosVideoIE(InfoExtractor):
|
||||
'op': 'download1',
|
||||
'method_free': 'Continue to Video',
|
||||
}
|
||||
req = compat_urllib_request.Request(url, urlencode_postdata(fields))
|
||||
req = sanitized_Request(url, urlencode_postdata(fields))
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
webpage = self._download_webpage(req, video_id,
|
||||
'Downloading download page')
|
||||
|
@@ -3,11 +3,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
NO_DEFAULT,
|
||||
encode_dict,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@@ -38,19 +40,40 @@ class NovaMovIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
page = self._download_webpage(
|
||||
'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page')
|
||||
url = 'http://%s/video/%s' % (self._HOST, video_id)
|
||||
|
||||
if re.search(self._FILE_DELETED_REGEX, page) is not None:
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, 'Downloading video page')
|
||||
|
||||
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey')
|
||||
def extract_filekey(default=NO_DEFAULT):
|
||||
return self._search_regex(
|
||||
self._FILEKEY_REGEX, webpage, 'filekey', default=default)
|
||||
|
||||
title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False)
|
||||
description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False)
|
||||
filekey = extract_filekey(default=None)
|
||||
|
||||
if not filekey:
|
||||
fields = self._hidden_inputs(webpage)
|
||||
post_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', webpage,
|
||||
'post url', default=url, group='url')
|
||||
if not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(url, post_url)
|
||||
request = sanitized_Request(
|
||||
post_url, urlencode_postdata(encode_dict(fields)))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
request.add_header('Referer', post_url)
|
||||
webpage = self._download_webpage(
|
||||
request, video_id, 'Downloading continue to the video page')
|
||||
|
||||
filekey = extract_filekey()
|
||||
|
||||
title = self._html_search_regex(self._TITLE_REGEX, webpage, 'title', fatal=False)
|
||||
description = self._html_search_regex(self._DESCRIPTION_REGEX, webpage, 'description', default='', fatal=False)
|
||||
|
||||
api_response = self._download_webpage(
|
||||
'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id,
|
||||
|
@@ -1,12 +1,12 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .brightcove import BrightcoveIE
|
||||
from .brightcove import BrightcoveLegacyIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@@ -22,10 +22,10 @@ class NownessBaseIE(InfoExtractor):
|
||||
'http://www.nowness.com/iframe?id=%s' % video_id, video_id,
|
||||
note='Downloading player JavaScript',
|
||||
errnote='Unable to download player JavaScript')
|
||||
bc_url = BrightcoveIE._extract_brightcove_url(player_code)
|
||||
bc_url = BrightcoveLegacyIE._extract_brightcove_url(player_code)
|
||||
if bc_url is None:
|
||||
raise ExtractorError('Could not find player definition')
|
||||
return self.url_result(bc_url, 'Brightcove')
|
||||
return self.url_result(bc_url, 'BrightcoveLegacy')
|
||||
elif source == 'vimeo':
|
||||
return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
|
||||
elif source == 'youtube':
|
||||
@@ -37,7 +37,7 @@ class NownessBaseIE(InfoExtractor):
|
||||
|
||||
def _api_request(self, url, request_path):
|
||||
display_id = self._match_id(url)
|
||||
request = compat_urllib_request.Request(
|
||||
request = sanitized_Request(
|
||||
'http://api.nowness.com/api/' + request_path % display_id,
|
||||
headers={
|
||||
'X-Nowness-Language': 'zh-cn' if 'cn.nowness.com' in url else 'en-us',
|
||||
|
@@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
@@ -13,8 +15,63 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class NowTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/(?:player|preview)'
|
||||
class NowTVBaseIE(InfoExtractor):
|
||||
_VIDEO_FIELDS = (
|
||||
'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
|
||||
'broadcastStartDate', 'seoUrl', 'duration', 'files',
|
||||
'format.defaultImage169Format', 'format.defaultImage169Logo')
|
||||
|
||||
def _extract_video(self, info, display_id=None):
|
||||
video_id = compat_str(info['id'])
|
||||
|
||||
files = info['files']
|
||||
if not files:
|
||||
if info.get('geoblocked', False):
|
||||
raise ExtractorError(
|
||||
'Video %s is not available from your location due to geo restriction' % video_id,
|
||||
expected=True)
|
||||
if not info.get('free', True):
|
||||
raise ExtractorError(
|
||||
'Video %s is not available for free' % video_id, expected=True)
|
||||
|
||||
formats = []
|
||||
for item in files['items']:
|
||||
if determine_ext(item['path']) != 'f4v':
|
||||
continue
|
||||
app, play_path = remove_start(item['path'], '/').split('/', 1)
|
||||
formats.append({
|
||||
'url': 'rtmpe://fms.rtl.de',
|
||||
'app': app,
|
||||
'play_path': 'mp4:%s' % play_path,
|
||||
'ext': 'flv',
|
||||
'page_url': 'http://rtlnow.rtl.de',
|
||||
'player_url': 'http://cdn.static-fra.de/now/vodplayer.swf',
|
||||
'tbr': int_or_none(item.get('bitrate')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = info['title']
|
||||
description = info.get('articleLong') or info.get('articleShort')
|
||||
timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
|
||||
duration = parse_duration(info.get('duration'))
|
||||
|
||||
f = info.get('format', {})
|
||||
thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id or info.get('seoUrl'),
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class NowTVIE(NowTVBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:list/[^/]+/)?(?P<id>[^/]+)/(?:player|preview)'
|
||||
|
||||
_TESTS = [{
|
||||
# rtl
|
||||
@@ -23,7 +80,7 @@ class NowTVIE(InfoExtractor):
|
||||
'id': '203519',
|
||||
'display_id': 'bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit',
|
||||
'ext': 'flv',
|
||||
'title': 'Die neuen Bauern und eine Hochzeit',
|
||||
'title': 'Inka Bause stellt die neuen Bauern vor',
|
||||
'description': 'md5:e234e1ed6d63cf06be5c070442612e7e',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1432580700,
|
||||
@@ -136,58 +193,65 @@ class NowTVIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
display_id_split = display_id.split('/')
|
||||
if len(display_id) > 2:
|
||||
display_id = '/'.join((display_id_split[0], display_id_split[-1]))
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = '%s/%s' % (mobj.group('show_id'), mobj.group('id'))
|
||||
|
||||
info = self._download_json(
|
||||
'https://api.nowtv.de/v3/movies/%s?fields=id,title,free,geoblocked,articleLong,articleShort,broadcastStartDate,seoUrl,duration,format,files' % display_id,
|
||||
display_id)
|
||||
'https://api.nowtv.de/v3/movies/%s?fields=%s'
|
||||
% (display_id, ','.join(self._VIDEO_FIELDS)), display_id)
|
||||
|
||||
video_id = compat_str(info['id'])
|
||||
return self._extract_video(info, display_id)
|
||||
|
||||
files = info['files']
|
||||
if not files:
|
||||
if info.get('geoblocked', False):
|
||||
raise ExtractorError(
|
||||
'Video %s is not available from your location due to geo restriction' % video_id,
|
||||
expected=True)
|
||||
if not info.get('free', True):
|
||||
raise ExtractorError(
|
||||
'Video %s is not available for free' % video_id, expected=True)
|
||||
|
||||
formats = []
|
||||
for item in files['items']:
|
||||
if determine_ext(item['path']) != 'f4v':
|
||||
continue
|
||||
app, play_path = remove_start(item['path'], '/').split('/', 1)
|
||||
formats.append({
|
||||
'url': 'rtmpe://fms.rtl.de',
|
||||
'app': app,
|
||||
'play_path': 'mp4:%s' % play_path,
|
||||
'ext': 'flv',
|
||||
'page_url': 'http://rtlnow.rtl.de',
|
||||
'player_url': 'http://cdn.static-fra.de/now/vodplayer.swf',
|
||||
'tbr': int_or_none(item.get('bitrate')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
class NowTVListIE(NowTVBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/list/(?P<id>[^?/#&]+)$'
|
||||
|
||||
title = info['title']
|
||||
description = info.get('articleLong') or info.get('articleShort')
|
||||
timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
|
||||
duration = parse_duration(info.get('duration'))
|
||||
_SHOW_FIELDS = ('title', )
|
||||
_SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
|
||||
|
||||
f = info.get('format', {})
|
||||
thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nowtv.at/rtl/stern-tv/list/aktuell',
|
||||
'info_dict': {
|
||||
'id': '17006',
|
||||
'title': 'stern TV - Aktuell',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}, {
|
||||
'url': 'http://www.nowtv.at/rtl/das-supertalent/list/free-staffel-8',
|
||||
'info_dict': {
|
||||
'id': '20716',
|
||||
'title': 'Das Supertalent - FREE Staffel 8',
|
||||
},
|
||||
'playlist_count': 14,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
show_id = mobj.group('show_id')
|
||||
season_id = mobj.group('id')
|
||||
|
||||
fields = []
|
||||
fields.extend(self._SHOW_FIELDS)
|
||||
fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
|
||||
fields.extend(
|
||||
'formatTabs.formatTabPages.container.movies.%s' % field
|
||||
for field in self._VIDEO_FIELDS)
|
||||
|
||||
list_info = self._download_json(
|
||||
'https://api.nowtv.de/v3/formats/seo?fields=%s&name=%s.php'
|
||||
% (','.join(fields), show_id),
|
||||
season_id)
|
||||
|
||||
season = next(
|
||||
season for season in list_info['formatTabs']['items']
|
||||
if season.get('seoheadline') == season_id)
|
||||
|
||||
title = '%s - %s' % (list_info['title'], season['headline'])
|
||||
|
||||
entries = []
|
||||
for container in season['formatTabPages']['items']:
|
||||
for info in ((container.get('container') or {}).get('movies') or {}).get('items') or []:
|
||||
entries.append(self._extract_video(info))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, compat_str(season.get('id') or season_id), title)
|
||||
|
@@ -7,9 +7,9 @@ class NowVideoIE(NovaMovIE):
|
||||
IE_NAME = 'nowvideo'
|
||||
IE_DESC = 'NowVideo'
|
||||
|
||||
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|ec|sx|eu|at|ag|co|li)'}
|
||||
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'}
|
||||
|
||||
_HOST = 'www.nowvideo.ch'
|
||||
_HOST = 'www.nowvideo.to'
|
||||
|
||||
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
|
||||
_FILEKEY_REGEX = r'var fkzd="([^"]+)";'
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user