Compare commits
39 Commits
2017.05.01
...
2017.05.07
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4ac0f573ef | ||
|
|
3892a9f4ab | ||
|
|
3995d37da5 | ||
|
|
e4a75d7932 | ||
|
|
e00eb564e9 | ||
|
|
10c87c151b | ||
|
|
228cd9bb90 | ||
|
|
566fbbaefd | ||
|
|
74c09c852a | ||
|
|
fd178b8748 | ||
|
|
a57a8e9918 | ||
|
|
1f9fefe7f5 | ||
|
|
8b4774dcac | ||
|
|
a99cc4ca16 | ||
|
|
9cafc3fd8b | ||
|
|
329e3dd5ad | ||
|
|
1d9e0a4f40 | ||
|
|
7ad53cb7ff | ||
|
|
b2ad479d17 | ||
|
|
4ac6dc3732 | ||
|
|
cc7bda4fff | ||
|
|
50ad078b7b | ||
|
|
4947f13cd0 | ||
|
|
7f09e523e8 | ||
|
|
4fe14732a2 | ||
|
|
ff6f9a6704 | ||
|
|
0c26548601 | ||
|
|
5401bea27f | ||
|
|
7a6d33a9a5 | ||
|
|
fa2a36d9bc | ||
|
|
55949fede6 | ||
|
|
7fc875195f | ||
|
|
c6fe5a7e12 | ||
|
|
ae21d2fd94 | ||
|
|
77481f1386 | ||
|
|
d86d169dd5 | ||
|
|
b9f9f361fa | ||
|
|
ab39a25c75 | ||
|
|
a146fa1c68 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.05.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.05.01**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.05.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.05.07**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2017.05.01
|
||||
[debug] youtube-dl version 2017.05.07
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
31
ChangeLog
31
ChangeLog
@@ -1,3 +1,34 @@
|
||||
version 2017.05.07
|
||||
|
||||
Common
|
||||
* [extractor/common] Fix typo in _extract_akamai_formats
|
||||
+ [postprocessor/ffmpeg] Embed chapters into media file with --add-metadata
|
||||
+ [extractor/common] Introduce chapters meta field
|
||||
|
||||
Extractors
|
||||
* [youtube] Fix authentication (#12820, #12927, #12973, #12992, #12993, #12995,
|
||||
#13003)
|
||||
* [bilibili] Fix video downloading (#13001)
|
||||
* [rmcdecouverte] Fix extraction (#12937)
|
||||
* [theplatform] Extract chapters
|
||||
* [bandcamp] Fix thumbnail extraction (#12980)
|
||||
* [pornhub] Extend URL regular expression (#12996)
|
||||
+ [youtube] Extract chapters
|
||||
+ [nrk] Extract chapters
|
||||
+ [vice] Add support for ooyala embeds in article pages
|
||||
+ [vice] Support vice articles (#12968)
|
||||
* [vice] Fix extraction for non en_us videos (#12967)
|
||||
* [gdcvault] Fix extraction for some videos (#12733)
|
||||
* [pbs] Improve multipart video support (#12981)
|
||||
* [laola1tv] Fix extraction (#12880)
|
||||
+ [cda] Support birthday verification (#12789)
|
||||
* [leeco] Fix extraction (#12974)
|
||||
+ [pbs] Extract chapters
|
||||
* [amp] Imporove thumbnail and subtitles extraction
|
||||
* [foxsports] Fix extraction (#12945)
|
||||
- [coub] Remove comment count extraction (#12941)
|
||||
|
||||
|
||||
version 2017.05.01
|
||||
|
||||
Core
|
||||
|
||||
@@ -879,9 +879,10 @@
|
||||
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
|
||||
- **vh1.com**
|
||||
- **Viafree**
|
||||
- **Vice**
|
||||
- **vice**
|
||||
- **vice:article**
|
||||
- **vice:show**
|
||||
- **Viceland**
|
||||
- **ViceShow**
|
||||
- **Vidbit**
|
||||
- **Viddler**
|
||||
- **Videa**
|
||||
|
||||
@@ -44,6 +44,7 @@ from youtube_dl.utils import (
|
||||
limit_length,
|
||||
mimetype2ext,
|
||||
month_by_name,
|
||||
multipart_encode,
|
||||
ohdave_rsa_encrypt,
|
||||
OnDemandPagedList,
|
||||
orderedSet,
|
||||
@@ -620,6 +621,16 @@ class TestUtil(unittest.TestCase):
|
||||
'http://example.com/path', {'test': '第二行тест'})),
|
||||
query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
|
||||
|
||||
def test_multipart_encode(self):
|
||||
self.assertEqual(
|
||||
multipart_encode({b'field': b'value'}, boundary='AAAAAA')[0],
|
||||
b'--AAAAAA\r\nContent-Disposition: form-data; name="field"\r\n\r\nvalue\r\n--AAAAAA--\r\n')
|
||||
self.assertEqual(
|
||||
multipart_encode({'欄位'.encode('utf-8'): '值'.encode('utf-8')}, boundary='AAAAAA')[0],
|
||||
b'--AAAAAA\r\nContent-Disposition: form-data; name="\xe6\xac\x84\xe4\xbd\x8d"\r\n\r\n\xe5\x80\xbc\r\n--AAAAAA--\r\n')
|
||||
self.assertRaises(
|
||||
ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
|
||||
|
||||
def test_dict_get(self):
|
||||
FALSE_VALUES = {
|
||||
'none': None,
|
||||
|
||||
268
test/test_youtube_chapters.py
Normal file
268
test/test_youtube_chapters.py
Normal file
@@ -0,0 +1,268 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import expect_value
|
||||
from youtube_dl.extractor import YoutubeIE
|
||||
|
||||
|
||||
class TestYoutubeChapters(unittest.TestCase):
|
||||
|
||||
_TEST_CASES = [
|
||||
(
|
||||
# https://www.youtube.com/watch?v=A22oy8dFjqc
|
||||
# pattern: 00:00 - <title>
|
||||
'''This is the absolute ULTIMATE experience of Queen's set at LIVE AID, this is the best video mixed to the absolutely superior stereo radio broadcast. This vastly superior audio mix takes a huge dump on all of the official mixes. Best viewed in 1080p. ENJOY! ***MAKE SURE TO READ THE DESCRIPTION***<br /><a href="#" onclick="yt.www.watch.player.seekTo(00*60+36);return false;">00:36</a> - Bohemian Rhapsody<br /><a href="#" onclick="yt.www.watch.player.seekTo(02*60+42);return false;">02:42</a> - Radio Ga Ga<br /><a href="#" onclick="yt.www.watch.player.seekTo(06*60+53);return false;">06:53</a> - Ay Oh!<br /><a href="#" onclick="yt.www.watch.player.seekTo(07*60+34);return false;">07:34</a> - Hammer To Fall<br /><a href="#" onclick="yt.www.watch.player.seekTo(12*60+08);return false;">12:08</a> - Crazy Little Thing Called Love<br /><a href="#" onclick="yt.www.watch.player.seekTo(16*60+03);return false;">16:03</a> - We Will Rock You<br /><a href="#" onclick="yt.www.watch.player.seekTo(17*60+18);return false;">17:18</a> - We Are The Champions<br /><a href="#" onclick="yt.www.watch.player.seekTo(21*60+12);return false;">21:12</a> - Is This The World We Created...?<br /><br />Short song analysis:<br /><br />- "Bohemian Rhapsody": Although it's a short medley version, it's one of the best performances of the ballad section, with Freddie nailing the Bb4s with the correct studio phrasing (for the first time ever!).<br /><br />- "Radio Ga Ga": Although it's missing one chorus, this is one of - if not the best - the best versions ever, Freddie nails all the Bb4s and sounds very clean! Spike Edney's Roland Jupiter 8 also really shines through on this mix, compared to the DVD releases!<br /><br />- "Audience Improv": A great improv, Freddie sounds strong and confident. You gotta love when he sustains that A4 for 4 seconds!<br /><br />- "Hammer To Fall": Despite missing a verse and a chorus, it's a strong version (possibly the best ever). Freddie sings the song amazingly, and even ad-libs a C#5 and a C5! Also notice how heavy Brian's guitar sounds compared to the thin DVD mixes - it roars!<br /><br />- "Crazy Little Thing Called Love": A great version, the crowd loves the song, the jam is great as well! Only downside to this is the slight feedback issues.<br /><br />- "We Will Rock You": Although cut down to the 1st verse and chorus, Freddie sounds strong. He nails the A4, and the solo from Dr. May is brilliant!<br /><br />- "We Are the Champions": Perhaps the high-light of the performance - Freddie is very daring on this version, he sustains the pre-chorus Bb4s, nails the 1st C5, belts great A4s, but most importantly: He nails the chorus Bb4s, in all 3 choruses! This is the only time he has ever done so! It has to be said though, the last one sounds a bit rough, but that's a side effect of belting high notes for the past 18 minutes, with nodules AND laryngitis!<br /><br />- "Is This The World We Created... ?": Freddie and Brian perform a beautiful version of this, and it is one of the best versions ever. It's both sad and hilarious that a couple of BBC engineers are talking over the song, one of them being completely oblivious of the fact that he is interrupting the performance, on live television... Which was being televised to almost 2 billion homes.<br /><br /><br />All rights go to their respective owners!<br />-----Copyright Disclaimer Under Section 107 of the Copyright Act 1976, allowance is made for fair use for purposes such as criticism, comment, news reporting, teaching, scholarship, and research. Fair use is a use permitted by copyright statute that might otherwise be infringing. Non-profit, educational or personal use tips the balance in favor of fair use''',
|
||||
1477,
|
||||
[{
|
||||
'start_time': 36,
|
||||
'end_time': 162,
|
||||
'title': 'Bohemian Rhapsody',
|
||||
}, {
|
||||
'start_time': 162,
|
||||
'end_time': 413,
|
||||
'title': 'Radio Ga Ga',
|
||||
}, {
|
||||
'start_time': 413,
|
||||
'end_time': 454,
|
||||
'title': 'Ay Oh!',
|
||||
}, {
|
||||
'start_time': 454,
|
||||
'end_time': 728,
|
||||
'title': 'Hammer To Fall',
|
||||
}, {
|
||||
'start_time': 728,
|
||||
'end_time': 963,
|
||||
'title': 'Crazy Little Thing Called Love',
|
||||
}, {
|
||||
'start_time': 963,
|
||||
'end_time': 1038,
|
||||
'title': 'We Will Rock You',
|
||||
}, {
|
||||
'start_time': 1038,
|
||||
'end_time': 1272,
|
||||
'title': 'We Are The Champions',
|
||||
}, {
|
||||
'start_time': 1272,
|
||||
'end_time': 1477,
|
||||
'title': 'Is This The World We Created...?',
|
||||
}]
|
||||
),
|
||||
(
|
||||
# https://www.youtube.com/watch?v=ekYlRhALiRQ
|
||||
# pattern: <num>. <title> 0:00
|
||||
'1. Those Beaten Paths of Confusion <a href="#" onclick="yt.www.watch.player.seekTo(0*60+00);return false;">0:00</a><br />2. Beyond the Shadows of Emptiness & Nothingness <a href="#" onclick="yt.www.watch.player.seekTo(11*60+47);return false;">11:47</a><br />3. Poison Yourself...With Thought <a href="#" onclick="yt.www.watch.player.seekTo(26*60+30);return false;">26:30</a><br />4. The Agents of Transformation <a href="#" onclick="yt.www.watch.player.seekTo(35*60+57);return false;">35:57</a><br />5. Drowning in the Pain of Consciousness <a href="#" onclick="yt.www.watch.player.seekTo(44*60+32);return false;">44:32</a><br />6. Deny the Disease of Life <a href="#" onclick="yt.www.watch.player.seekTo(53*60+07);return false;">53:07</a><br /><br />More info/Buy: http://crepusculonegro.storenvy.com/products/257645-cn-03-arizmenda-within-the-vacuum-of-infinity<br /><br />No copyright is intended. The rights to this video are assumed by the owner and its affiliates.',
|
||||
4009,
|
||||
[{
|
||||
'start_time': 0,
|
||||
'end_time': 707,
|
||||
'title': '1. Those Beaten Paths of Confusion',
|
||||
}, {
|
||||
'start_time': 707,
|
||||
'end_time': 1590,
|
||||
'title': '2. Beyond the Shadows of Emptiness & Nothingness',
|
||||
}, {
|
||||
'start_time': 1590,
|
||||
'end_time': 2157,
|
||||
'title': '3. Poison Yourself...With Thought',
|
||||
}, {
|
||||
'start_time': 2157,
|
||||
'end_time': 2672,
|
||||
'title': '4. The Agents of Transformation',
|
||||
}, {
|
||||
'start_time': 2672,
|
||||
'end_time': 3187,
|
||||
'title': '5. Drowning in the Pain of Consciousness',
|
||||
}, {
|
||||
'start_time': 3187,
|
||||
'end_time': 4009,
|
||||
'title': '6. Deny the Disease of Life',
|
||||
}]
|
||||
),
|
||||
(
|
||||
# https://www.youtube.com/watch?v=WjL4pSzog9w
|
||||
# pattern: 00:00 <title>
|
||||
'<a href="https://arizmenda.bandcamp.com/merch/despairs-depths-descended-cd" class="yt-uix-servicelink " data-target-new-window="True" data-servicelink="CDAQ6TgYACITCNf1raqT2dMCFdRjGAod_o0CBSj4HQ" data-url="https://arizmenda.bandcamp.com/merch/despairs-depths-descended-cd" rel="nofollow noopener" target="_blank">https://arizmenda.bandcamp.com/merch/...</a><br /><br /><a href="#" onclick="yt.www.watch.player.seekTo(00*60+00);return false;">00:00</a> Christening Unborn Deformities <br /><a href="#" onclick="yt.www.watch.player.seekTo(07*60+08);return false;">07:08</a> Taste of Purity<br /><a href="#" onclick="yt.www.watch.player.seekTo(16*60+16);return false;">16:16</a> Sculpting Sins of a Universal Tongue<br /><a href="#" onclick="yt.www.watch.player.seekTo(24*60+45);return false;">24:45</a> Birth<br /><a href="#" onclick="yt.www.watch.player.seekTo(31*60+24);return false;">31:24</a> Neves<br /><a href="#" onclick="yt.www.watch.player.seekTo(37*60+55);return false;">37:55</a> Libations in Limbo',
|
||||
2705,
|
||||
[{
|
||||
'start_time': 0,
|
||||
'end_time': 428,
|
||||
'title': 'Christening Unborn Deformities',
|
||||
}, {
|
||||
'start_time': 428,
|
||||
'end_time': 976,
|
||||
'title': 'Taste of Purity',
|
||||
}, {
|
||||
'start_time': 976,
|
||||
'end_time': 1485,
|
||||
'title': 'Sculpting Sins of a Universal Tongue',
|
||||
}, {
|
||||
'start_time': 1485,
|
||||
'end_time': 1884,
|
||||
'title': 'Birth',
|
||||
}, {
|
||||
'start_time': 1884,
|
||||
'end_time': 2275,
|
||||
'title': 'Neves',
|
||||
}, {
|
||||
'start_time': 2275,
|
||||
'end_time': 2705,
|
||||
'title': 'Libations in Limbo',
|
||||
}]
|
||||
),
|
||||
(
|
||||
# https://www.youtube.com/watch?v=o3r1sn-t3is
|
||||
# pattern: <title> 00:00 <note>
|
||||
'Download this show in MP3: <a href="http://sh.st/njZKK" class="yt-uix-servicelink " data-url="http://sh.st/njZKK" data-target-new-window="True" data-servicelink="CDAQ6TgYACITCK3j8_6o2dMCFVDCGAoduVAKKij4HQ" rel="nofollow noopener" target="_blank">http://sh.st/njZKK</a><br /><br />Setlist:<br />I-E-A-I-A-I-O <a href="#" onclick="yt.www.watch.player.seekTo(00*60+45);return false;">00:45</a><br />Suite-Pee <a href="#" onclick="yt.www.watch.player.seekTo(4*60+26);return false;">4:26</a> (Incomplete)<br />Attack <a href="#" onclick="yt.www.watch.player.seekTo(5*60+31);return false;">5:31</a> (First live performance since 2011)<br />Prison Song <a href="#" onclick="yt.www.watch.player.seekTo(8*60+42);return false;">8:42</a><br />Know <a href="#" onclick="yt.www.watch.player.seekTo(12*60+32);return false;">12:32</a> (First live performance since 2011)<br />Aerials <a href="#" onclick="yt.www.watch.player.seekTo(15*60+32);return false;">15:32</a><br />Soldier Side - Intro <a href="#" onclick="yt.www.watch.player.seekTo(19*60+13);return false;">19:13</a><br />B.Y.O.B. <a href="#" onclick="yt.www.watch.player.seekTo(20*60+09);return false;">20:09</a><br />Soil <a href="#" onclick="yt.www.watch.player.seekTo(24*60+32);return false;">24:32</a><br />Darts <a href="#" onclick="yt.www.watch.player.seekTo(27*60+48);return false;">27:48</a><br />Radio/Video <a href="#" onclick="yt.www.watch.player.seekTo(30*60+38);return false;">30:38</a><br />Hypnotize <a href="#" onclick="yt.www.watch.player.seekTo(35*60+05);return false;">35:05</a><br />Temper <a href="#" onclick="yt.www.watch.player.seekTo(38*60+08);return false;">38:08</a> (First live performance since 1999)<br />CUBErt <a href="#" onclick="yt.www.watch.player.seekTo(41*60+00);return false;">41:00</a><br />Needles <a href="#" onclick="yt.www.watch.player.seekTo(42*60+57);return false;">42:57</a><br />Deer Dance <a href="#" onclick="yt.www.watch.player.seekTo(46*60+27);return false;">46:27</a><br />Bounce <a href="#" onclick="yt.www.watch.player.seekTo(49*60+38);return false;">49:38</a><br />Suggestions <a href="#" onclick="yt.www.watch.player.seekTo(51*60+25);return false;">51:25</a><br />Psycho <a href="#" onclick="yt.www.watch.player.seekTo(53*60+52);return false;">53:52</a><br />Chop Suey! <a href="#" onclick="yt.www.watch.player.seekTo(58*60+13);return false;">58:13</a><br />Lonely Day <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+01*60+15);return false;">1:01:15</a><br />Question! <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+04*60+14);return false;">1:04:14</a><br />Lost in Hollywood <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+08*60+10);return false;">1:08:10</a><br />Vicinity of Obscenity <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+13*60+40);return false;">1:13:40</a>(First live performance since 2012)<br />Forest <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+16*60+17);return false;">1:16:17</a><br />Cigaro <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+20*60+02);return false;">1:20:02</a><br />Toxicity <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+23*60+57);return false;">1:23:57</a>(with Chino Moreno)<br />Sugar <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+27*60+53);return false;">1:27:53</a>',
|
||||
5640,
|
||||
[{
|
||||
'start_time': 45,
|
||||
'end_time': 266,
|
||||
'title': 'I-E-A-I-A-I-O',
|
||||
}, {
|
||||
'start_time': 266,
|
||||
'end_time': 331,
|
||||
'title': 'Suite-Pee (Incomplete)',
|
||||
}, {
|
||||
'start_time': 331,
|
||||
'end_time': 522,
|
||||
'title': 'Attack (First live performance since 2011)',
|
||||
}, {
|
||||
'start_time': 522,
|
||||
'end_time': 752,
|
||||
'title': 'Prison Song',
|
||||
}, {
|
||||
'start_time': 752,
|
||||
'end_time': 932,
|
||||
'title': 'Know (First live performance since 2011)',
|
||||
}, {
|
||||
'start_time': 932,
|
||||
'end_time': 1153,
|
||||
'title': 'Aerials',
|
||||
}, {
|
||||
'start_time': 1153,
|
||||
'end_time': 1209,
|
||||
'title': 'Soldier Side - Intro',
|
||||
}, {
|
||||
'start_time': 1209,
|
||||
'end_time': 1472,
|
||||
'title': 'B.Y.O.B.',
|
||||
}, {
|
||||
'start_time': 1472,
|
||||
'end_time': 1668,
|
||||
'title': 'Soil',
|
||||
}, {
|
||||
'start_time': 1668,
|
||||
'end_time': 1838,
|
||||
'title': 'Darts',
|
||||
}, {
|
||||
'start_time': 1838,
|
||||
'end_time': 2105,
|
||||
'title': 'Radio/Video',
|
||||
}, {
|
||||
'start_time': 2105,
|
||||
'end_time': 2288,
|
||||
'title': 'Hypnotize',
|
||||
}, {
|
||||
'start_time': 2288,
|
||||
'end_time': 2460,
|
||||
'title': 'Temper (First live performance since 1999)',
|
||||
}, {
|
||||
'start_time': 2460,
|
||||
'end_time': 2577,
|
||||
'title': 'CUBErt',
|
||||
}, {
|
||||
'start_time': 2577,
|
||||
'end_time': 2787,
|
||||
'title': 'Needles',
|
||||
}, {
|
||||
'start_time': 2787,
|
||||
'end_time': 2978,
|
||||
'title': 'Deer Dance',
|
||||
}, {
|
||||
'start_time': 2978,
|
||||
'end_time': 3085,
|
||||
'title': 'Bounce',
|
||||
}, {
|
||||
'start_time': 3085,
|
||||
'end_time': 3232,
|
||||
'title': 'Suggestions',
|
||||
}, {
|
||||
'start_time': 3232,
|
||||
'end_time': 3493,
|
||||
'title': 'Psycho',
|
||||
}, {
|
||||
'start_time': 3493,
|
||||
'end_time': 3675,
|
||||
'title': 'Chop Suey!',
|
||||
}, {
|
||||
'start_time': 3675,
|
||||
'end_time': 3854,
|
||||
'title': 'Lonely Day',
|
||||
}, {
|
||||
'start_time': 3854,
|
||||
'end_time': 4090,
|
||||
'title': 'Question!',
|
||||
}, {
|
||||
'start_time': 4090,
|
||||
'end_time': 4420,
|
||||
'title': 'Lost in Hollywood',
|
||||
}, {
|
||||
'start_time': 4420,
|
||||
'end_time': 4577,
|
||||
'title': 'Vicinity of Obscenity (First live performance since 2012)',
|
||||
}, {
|
||||
'start_time': 4577,
|
||||
'end_time': 4802,
|
||||
'title': 'Forest',
|
||||
}, {
|
||||
'start_time': 4802,
|
||||
'end_time': 5037,
|
||||
'title': 'Cigaro',
|
||||
}, {
|
||||
'start_time': 5037,
|
||||
'end_time': 5273,
|
||||
'title': 'Toxicity (with Chino Moreno)',
|
||||
}, {
|
||||
'start_time': 5273,
|
||||
'end_time': 5640,
|
||||
'title': 'Sugar',
|
||||
}]
|
||||
),
|
||||
(
|
||||
# https://www.youtube.com/watch?v=PkYLQbsqCE8
|
||||
# pattern: <num> - <title> [<latinized title>] 0:00:00
|
||||
'''Затемно (Zatemno) is an Obscure Black Metal Band from Russia.<br /><br />"Во прах (Vo prakh)'' Into The Ashes", Debut mini-album released may 6, 2016, by Death Knell Productions<br />Released on 6 panel digipak CD, limited to 100 copies only<br />And digital format on Bandcamp<br /><br />Tracklist<br /><br />1 - Во прах [Vo prakh] <a href="#" onclick="yt.www.watch.player.seekTo(0*3600+00*60+00);return false;">0:00:00</a><br />2 - Искупление [Iskupleniye] <a href="#" onclick="yt.www.watch.player.seekTo(0*3600+08*60+10);return false;">0:08:10</a><br />3 - Из серпов луны...[Iz serpov luny] <a href="#" onclick="yt.www.watch.player.seekTo(0*3600+14*60+30);return false;">0:14:30</a><br /><br />Links:<br /><a href="https://deathknellprod.bandcamp.com/album/--2" class="yt-uix-servicelink " data-target-new-window="True" data-url="https://deathknellprod.bandcamp.com/album/--2" data-servicelink="CC8Q6TgYACITCNP234Kr2dMCFcNxGAodQqsIwSj4HQ" target="_blank" rel="nofollow noopener">https://deathknellprod.bandcamp.com/a...</a><br /><a href="https://www.facebook.com/DeathKnellProd/" class="yt-uix-servicelink " data-target-new-window="True" data-url="https://www.facebook.com/DeathKnellProd/" data-servicelink="CC8Q6TgYACITCNP234Kr2dMCFcNxGAodQqsIwSj4HQ" target="_blank" rel="nofollow noopener">https://www.facebook.com/DeathKnellProd/</a><br /><br /><br />I don't have any right about this artifact, my only intention is to spread the music of the band, all rights are reserved to the Затемно (Zatemno) and his producers, Death Knell Productions.<br /><br />------------------------------------------------------------------<br /><br />Subscribe for more videos like this.<br />My link: <a href="https://web.facebook.com/AttackOfTheDragons" class="yt-uix-servicelink " data-target-new-window="True" data-url="https://web.facebook.com/AttackOfTheDragons" data-servicelink="CC8Q6TgYACITCNP234Kr2dMCFcNxGAodQqsIwSj4HQ" target="_blank" rel="nofollow noopener">https://web.facebook.com/AttackOfTheD...</a>''',
|
||||
1138,
|
||||
[{
|
||||
'start_time': 0,
|
||||
'end_time': 490,
|
||||
'title': '1 - Во прах [Vo prakh]',
|
||||
}, {
|
||||
'start_time': 490,
|
||||
'end_time': 870,
|
||||
'title': '2 - Искупление [Iskupleniye]',
|
||||
}, {
|
||||
'start_time': 870,
|
||||
'end_time': 1138,
|
||||
'title': '3 - Из серпов луны...[Iz serpov luny]',
|
||||
}]
|
||||
),
|
||||
]
|
||||
|
||||
def test_youtube_chapters(self):
|
||||
for description, duration, expected_chapters in self._TEST_CASES:
|
||||
ie = YoutubeIE()
|
||||
expect_value(
|
||||
self, ie._extract_chapters(description, duration),
|
||||
expected_chapters, None)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -34,9 +34,12 @@ class AMPIE(InfoExtractor):
|
||||
if isinstance(media_thumbnail, dict):
|
||||
media_thumbnail = [media_thumbnail]
|
||||
for thumbnail_data in media_thumbnail:
|
||||
thumbnail = thumbnail_data['@attributes']
|
||||
thumbnail = thumbnail_data.get('@attributes', {})
|
||||
thumbnail_url = thumbnail.get('url')
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': self._proto_relative_url(thumbnail['url'], 'http:'),
|
||||
'url': self._proto_relative_url(thumbnail_url, 'http:'),
|
||||
'width': int_or_none(thumbnail.get('width')),
|
||||
'height': int_or_none(thumbnail.get('height')),
|
||||
})
|
||||
@@ -47,9 +50,14 @@ class AMPIE(InfoExtractor):
|
||||
if isinstance(media_subtitle, dict):
|
||||
media_subtitle = [media_subtitle]
|
||||
for subtitle_data in media_subtitle:
|
||||
subtitle = subtitle_data['@attributes']
|
||||
lang = subtitle.get('lang') or 'en'
|
||||
subtitles[lang] = [{'url': subtitle['href']}]
|
||||
subtitle = subtitle_data.get('@attributes', {})
|
||||
subtitle_href = subtitle.get('href')
|
||||
if not subtitle_href:
|
||||
continue
|
||||
subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
|
||||
'url': subtitle_href,
|
||||
'ext': mimetype2ext(subtitle.get('type')) or determine_ext(subtitle_href),
|
||||
})
|
||||
|
||||
formats = []
|
||||
media_content = get_media_node('content')
|
||||
|
||||
@@ -47,6 +47,7 @@ class BandcampIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
thumbnail = self._html_search_meta('og:image', webpage, default=None)
|
||||
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
|
||||
if not m_download:
|
||||
m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
|
||||
@@ -75,6 +76,7 @@ class BandcampIE(InfoExtractor):
|
||||
return {
|
||||
'id': track_id,
|
||||
'title': data['title'],
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
'duration': float_or_none(data.get('duration')),
|
||||
}
|
||||
@@ -143,7 +145,7 @@ class BandcampIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': info.get('thumb_url'),
|
||||
'thumbnail': info.get('thumb_url') or thumbnail,
|
||||
'uploader': info.get('artist'),
|
||||
'artist': artist,
|
||||
'track': track,
|
||||
|
||||
@@ -122,6 +122,11 @@ class BiliBiliIE(InfoExtractor):
|
||||
'preference': -2 if 'hd.mp4' in backup_url else -3,
|
||||
})
|
||||
|
||||
for a_format in formats:
|
||||
a_format.setdefault('http_headers', {}).update({
|
||||
'Referer': url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
|
||||
@@ -9,7 +9,10 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
multipart_encode,
|
||||
parse_duration,
|
||||
random_birthday,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -27,7 +30,8 @@ class CDAIE(InfoExtractor):
|
||||
'description': 'md5:269ccd135d550da90d1662651fcb9772',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'average_rating': float,
|
||||
'duration': 39
|
||||
'duration': 39,
|
||||
'age_limit': 0,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.cda.pl/video/57413289',
|
||||
@@ -41,13 +45,41 @@ class CDAIE(InfoExtractor):
|
||||
'uploader': 'crash404',
|
||||
'view_count': int,
|
||||
'average_rating': float,
|
||||
'duration': 137
|
||||
'duration': 137,
|
||||
'age_limit': 0,
|
||||
}
|
||||
}, {
|
||||
# Age-restricted
|
||||
'url': 'http://www.cda.pl/video/1273454c4',
|
||||
'info_dict': {
|
||||
'id': '1273454c4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bronson (2008) napisy HD 1080p',
|
||||
'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c',
|
||||
'height': 1080,
|
||||
'uploader': 'boniek61',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 5554,
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'average_rating': float,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://ebd.cda.pl/0x0/5749950c',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
|
||||
form_data = random_birthday('rok', 'miesiac', 'dzien')
|
||||
form_data.update({'return': url, 'module': 'video', 'module_id': video_id})
|
||||
data, content_type = multipart_encode(form_data)
|
||||
return self._download_webpage(
|
||||
urljoin(url, '/a/validatebirth'), video_id, *args,
|
||||
data=data, headers={
|
||||
'Referer': url,
|
||||
'Content-Type': content_type,
|
||||
}, **kwargs)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
self._set_cookie('cda.pl', 'cda.player', 'html5')
|
||||
@@ -57,6 +89,13 @@ class CDAIE(InfoExtractor):
|
||||
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
|
||||
raise ExtractorError('This video is only available for premium users.', expected=True)
|
||||
|
||||
need_confirm_age = False
|
||||
if self._html_search_regex(r'(<form[^>]+action="/a/validatebirth")',
|
||||
webpage, 'birthday validate form', default=None):
|
||||
webpage = self._download_age_confirm_page(
|
||||
url, video_id, note='Confirming age')
|
||||
need_confirm_age = True
|
||||
|
||||
formats = []
|
||||
|
||||
uploader = self._search_regex(r'''(?x)
|
||||
@@ -81,6 +120,7 @@ class CDAIE(InfoExtractor):
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
'duration': None,
|
||||
'age_limit': 18 if need_confirm_age else 0,
|
||||
}
|
||||
|
||||
def extract_format(page, version):
|
||||
@@ -121,7 +161,12 @@ class CDAIE(InfoExtractor):
|
||||
for href, resolution in re.findall(
|
||||
r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
|
||||
webpage):
|
||||
webpage = self._download_webpage(
|
||||
if need_confirm_age:
|
||||
handler = self._download_age_confirm_page
|
||||
else:
|
||||
handler = self._download_webpage
|
||||
|
||||
webpage = handler(
|
||||
self._BASE_URL + href, video_id,
|
||||
'Downloading %s version information' % resolution, fatal=False)
|
||||
if not webpage:
|
||||
@@ -129,6 +174,7 @@ class CDAIE(InfoExtractor):
|
||||
# invalid version is requested.
|
||||
self.report_warning('Unable to download %s version information' % resolution)
|
||||
continue
|
||||
|
||||
extract_format(webpage, resolution)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@@ -245,6 +245,10 @@ class InfoExtractor(object):
|
||||
specified in the URL.
|
||||
end_time: Time in seconds where the reproduction should end, as
|
||||
specified in the URL.
|
||||
chapters: A list of dictionaries, with the following entries:
|
||||
* "start_time" - The start time of the chapter in seconds
|
||||
* "end_time" - The end time of the chapter in seconds
|
||||
* "title" (optional, string)
|
||||
|
||||
The following fields should only be used when the video belongs to some logical
|
||||
chapter or section:
|
||||
@@ -2170,7 +2174,7 @@ class InfoExtractor(object):
|
||||
def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
|
||||
formats = []
|
||||
hdcore_sign = 'hdcore=3.7.0'
|
||||
f4m_url = re.sub(r'(https?://[^/+])/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
|
||||
f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
|
||||
hds_host = hosts.get('hds')
|
||||
if hds_host:
|
||||
f4m_url = re.sub(r'(https?://)[^/]+', r'\1' + hds_host, f4m_url)
|
||||
|
||||
@@ -24,12 +24,11 @@ class CoubIE(InfoExtractor):
|
||||
'duration': 4.6,
|
||||
'timestamp': 1428527772,
|
||||
'upload_date': '20150408',
|
||||
'uploader': 'Артём Лоскутников',
|
||||
'uploader': 'Artyom Loskutnikov',
|
||||
'uploader_id': 'artyom.loskutnikov',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
@@ -118,7 +117,6 @@ class CoubIE(InfoExtractor):
|
||||
view_count = int_or_none(coub.get('views_count') or coub.get('views_increase_count'))
|
||||
like_count = int_or_none(coub.get('likes_count'))
|
||||
repost_count = int_or_none(coub.get('recoubs_count'))
|
||||
comment_count = int_or_none(coub.get('comments_count'))
|
||||
|
||||
age_restricted = coub.get('age_restricted', coub.get('age_restricted_by_admin'))
|
||||
if age_restricted is not None:
|
||||
@@ -137,7 +135,6 @@ class CoubIE(InfoExtractor):
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'repost_count': repost_count,
|
||||
'comment_count': comment_count,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -21,9 +21,10 @@ class CrackleIE(InfoExtractor):
|
||||
'season_number': 8,
|
||||
'episode_number': 4,
|
||||
'subtitles': {
|
||||
'en-US': [{
|
||||
'ext': 'ttml',
|
||||
}]
|
||||
'en-US': [
|
||||
{'ext': 'vtt'},
|
||||
{'ext': 'tt'},
|
||||
]
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
|
||||
@@ -171,7 +171,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
'info_dict': {
|
||||
'id': '727589',
|
||||
'ext': 'mp4',
|
||||
'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance from this Judicial Injustice!",
|
||||
'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance From This Judicial Injustice!",
|
||||
'description': 'md5:cbcf05e528124b0f3a0a419fc805ea7d',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Kadokawa Pictures Inc.',
|
||||
@@ -179,7 +179,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
'series': "KONOSUBA -God's blessing on this wonderful world!",
|
||||
'season': "KONOSUBA -God's blessing on this wonderful world! 2",
|
||||
'season_number': 2,
|
||||
'episode': 'Give Me Deliverance from this Judicial Injustice!',
|
||||
'episode': 'Give Me Deliverance From This Judicial Injustice!',
|
||||
'episode_number': 1,
|
||||
},
|
||||
'params': {
|
||||
|
||||
@@ -50,6 +50,24 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
]
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news',
|
||||
'md5': '074b95bdee76b9e3654137aee9c79dfe',
|
||||
'info_dict': {
|
||||
'id': 'x5kesuj',
|
||||
'ext': 'mp4',
|
||||
'title': 'Office Christmas Party Review – Jason Bateman, Olivia Munn, T.J. Miller',
|
||||
'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller',
|
||||
'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
|
||||
'duration': 187,
|
||||
'timestamp': 1493651285,
|
||||
'upload_date': '20170501',
|
||||
'uploader': 'Deadline',
|
||||
'uploader_id': 'x1xm8ri',
|
||||
'age_limit': 0,
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
||||
'md5': '2137c41a8e78554bb09225b8eb322406',
|
||||
@@ -66,7 +84,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'uploader_id': 'xijv66',
|
||||
'age_limit': 0,
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
'skip': 'video gone',
|
||||
},
|
||||
# Vevo video
|
||||
{
|
||||
|
||||
@@ -21,7 +21,8 @@ class DemocracynowIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '2015-0703-001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Daily Show',
|
||||
'title': 'Daily Show for July 03, 2015',
|
||||
'description': 'md5:80eb927244d6749900de6072c7cc2c86',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree',
|
||||
|
||||
@@ -35,7 +35,7 @@ class DotsubIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://dotsub.com/media/747bcf58-bd59-45b7-8c8c-ac312d084ee6/p',
|
||||
'duration': 290,
|
||||
'timestamp': 1476767794.2809999,
|
||||
'upload_date': '20160525',
|
||||
'upload_date': '20161018',
|
||||
'uploader': 'parthivi001',
|
||||
'uploader_id': 'user52596202',
|
||||
'view_count': int,
|
||||
|
||||
@@ -20,7 +20,7 @@ class DouyuTVIE(InfoExtractor):
|
||||
'id': '17732',
|
||||
'display_id': 'iseven',
|
||||
'ext': 'flv',
|
||||
'title': 're:^清晨醒脑!T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': r're:.*m7show@163\.com.*',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': '7师傅',
|
||||
@@ -51,7 +51,7 @@ class DouyuTVIE(InfoExtractor):
|
||||
'id': '17732',
|
||||
'display_id': '17732',
|
||||
'ext': 'flv',
|
||||
'title': 're:^清晨醒脑!T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': r're:.*m7show@163\.com.*',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': '7师傅',
|
||||
|
||||
@@ -1125,6 +1125,7 @@ from .vgtv import (
|
||||
from .vh1 import VH1IE
|
||||
from .vice import (
|
||||
ViceIE,
|
||||
ViceArticleIE,
|
||||
ViceShowIE,
|
||||
)
|
||||
from .viceland import VicelandIE
|
||||
|
||||
@@ -11,10 +11,10 @@ class FoxSportsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.foxsports.com/video?vid=432609859715',
|
||||
'url': 'http://www.foxsports.com/tennessee/video/432609859715',
|
||||
'md5': 'b49050e955bebe32c301972e4012ac17',
|
||||
'info_dict': {
|
||||
'id': 'i0qKWsk3qJaM',
|
||||
'id': 'bwduI3X_TgUB',
|
||||
'ext': 'mp4',
|
||||
'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
|
||||
'description': 'Courtney Lee talks about Memphis being focused.',
|
||||
@@ -31,8 +31,9 @@ class FoxSportsIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
config = self._parse_json(
|
||||
self._search_regex(
|
||||
r"data-player-config='([^']+)'", webpage, 'data player config'),
|
||||
self._html_search_regex(
|
||||
r"""class="[^"]*(?:fs-player|platformPlayer-wrapper)[^"]*".+?data-player-config='([^']+)'""",
|
||||
webpage, 'data player config'),
|
||||
video_id)
|
||||
|
||||
return self.url_result(smuggle_url(update_url_query(
|
||||
|
||||
@@ -75,6 +75,19 @@ class GDCVaultIE(InfoExtractor):
|
||||
'format': 'jp', # The japanese audio
|
||||
}
|
||||
},
|
||||
{
|
||||
# gdc-player.html
|
||||
'url': 'http://www.gdcvault.com/play/1435/An-American-engine-in-Tokyo',
|
||||
'info_dict': {
|
||||
'id': '1435',
|
||||
'display_id': 'An-American-engine-in-Tokyo',
|
||||
'ext': 'flv',
|
||||
'title': 'An American Engine in Tokyo:/nThe collaboration of Epic Games and Square Enix/nFor THE LAST REMINANT',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Requires rtmpdump
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _login(self, webpage_url, display_id):
|
||||
@@ -128,7 +141,7 @@ class GDCVaultIE(InfoExtractor):
|
||||
'title': title,
|
||||
}
|
||||
|
||||
PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/player.*?\.html.*?".*?</iframe>'
|
||||
PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/(?:gdc-)?player.*?\.html.*?".*?</iframe>'
|
||||
|
||||
xml_root = self._html_search_regex(
|
||||
PLAYER_REGEX, start_page, 'xml root', default=None)
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -8,15 +10,15 @@ from ..utils import (
|
||||
urlencode_postdata,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
urljoin,
|
||||
update_url_query,
|
||||
js_to_json,
|
||||
)
|
||||
|
||||
|
||||
class Laola1TvEmbedIE(InfoExtractor):
|
||||
IE_NAME = 'laola1tv:embed'
|
||||
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/titanplayer\.php\?.*?\bvideoid=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# flashvars.premium = "false";
|
||||
'url': 'https://www.laola1.tv/titanplayer.php?videoid=708065&type=V&lang=en&portal=int&customer=1024',
|
||||
'info_dict': {
|
||||
@@ -26,7 +28,30 @@ class Laola1TvEmbedIE(InfoExtractor):
|
||||
'uploader': 'ITTF - International Table Tennis Federation',
|
||||
'upload_date': '20161211',
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
def _extract_token_url(self, stream_access_url, video_id, data):
|
||||
return self._download_json(
|
||||
stream_access_url, video_id, headers={
|
||||
'Content-Type': 'application/json',
|
||||
}, data=json.dumps(data).encode())['data']['stream-access'][0]
|
||||
|
||||
def _extract_formats(self, token_url, video_id):
|
||||
token_doc = self._download_xml(
|
||||
token_url, video_id, 'Downloading token',
|
||||
headers=self.geo_verification_headers())
|
||||
|
||||
token_attrib = xpath_element(token_doc, './/token').attrib
|
||||
|
||||
if token_attrib['status'] != '0':
|
||||
raise ExtractorError(
|
||||
'Token error: %s' % token_attrib['comment'], expected=True)
|
||||
|
||||
formats = self._extract_akamai_formats(
|
||||
'%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']),
|
||||
video_id)
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -68,29 +93,16 @@ class Laola1TvEmbedIE(InfoExtractor):
|
||||
else:
|
||||
data_abo = urlencode_postdata(
|
||||
dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(','))))
|
||||
token_url = self._download_json(
|
||||
'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access',
|
||||
video_id, query={
|
||||
stream_access_url = update_url_query(
|
||||
'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access', {
|
||||
'videoId': _v('id'),
|
||||
'target': self._search_regex(r'vs_target = (\d+);', webpage, 'vs target'),
|
||||
'label': _v('label'),
|
||||
'area': _v('area'),
|
||||
}, data=data_abo)['data']['stream-access'][0]
|
||||
})
|
||||
token_url = self._extract_token_url(stream_access_url, video_id, data_abo)
|
||||
|
||||
token_doc = self._download_xml(
|
||||
token_url, video_id, 'Downloading token',
|
||||
headers=self.geo_verification_headers())
|
||||
|
||||
token_attrib = xpath_element(token_doc, './/token').attrib
|
||||
|
||||
if token_attrib['status'] != '0':
|
||||
raise ExtractorError(
|
||||
'Token error: %s' % token_attrib['comment'], expected=True)
|
||||
|
||||
formats = self._extract_akamai_formats(
|
||||
'%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']),
|
||||
video_id)
|
||||
self._sort_formats(formats)
|
||||
formats = self._extract_formats(token_url, video_id)
|
||||
|
||||
categories_str = _v('meta_sports')
|
||||
categories = categories_str.split(',') if categories_str else []
|
||||
@@ -107,7 +119,7 @@ class Laola1TvEmbedIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class Laola1TvIE(InfoExtractor):
|
||||
class Laola1TvIE(Laola1TvEmbedIE):
|
||||
IE_NAME = 'laola1tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
@@ -164,13 +176,42 @@ class Laola1TvIE(InfoExtractor):
|
||||
if 'Dieser Livestream ist bereits beendet.' in webpage:
|
||||
raise ExtractorError('This live stream has already finished.', expected=True)
|
||||
|
||||
iframe_url = urljoin(url, self._search_regex(
|
||||
r'<iframe[^>]*?id="videoplayer"[^>]*?src="([^"]+)"',
|
||||
webpage, 'iframe url'))
|
||||
conf = self._parse_json(self._search_regex(
|
||||
r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'),
|
||||
display_id, js_to_json)
|
||||
|
||||
video_id = conf['videoid']
|
||||
|
||||
config = self._download_json(conf['configUrl'], video_id, query={
|
||||
'videoid': video_id,
|
||||
'partnerid': conf['partnerid'],
|
||||
'language': conf.get('language', ''),
|
||||
'portal': conf.get('portalid', ''),
|
||||
})
|
||||
error = config.get('error')
|
||||
if error:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
video_data = config['video']
|
||||
title = video_data['title']
|
||||
is_live = video_data.get('isLivestream') and video_data.get('isLive')
|
||||
meta = video_data.get('metaInformation')
|
||||
sports = meta.get('sports')
|
||||
categories = sports.split(',') if sports else []
|
||||
|
||||
token_url = self._extract_token_url(
|
||||
video_data['streamAccess'], video_id,
|
||||
video_data['abo']['required'])
|
||||
|
||||
formats = self._extract_formats(token_url, video_id)
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': iframe_url,
|
||||
'ie_key': 'Laola1TvEmbed',
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': video_data.get('image'),
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
@@ -23,7 +23,6 @@ from ..utils import (
|
||||
str_or_none,
|
||||
url_basename,
|
||||
urshift,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@@ -51,7 +50,7 @@ class LeIE(InfoExtractor):
|
||||
'id': '1415246',
|
||||
'ext': 'mp4',
|
||||
'title': '美人天下01',
|
||||
'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda',
|
||||
'description': 'md5:28942e650e82ed4fcc8e4de919ee854d',
|
||||
},
|
||||
'params': {
|
||||
'hls_prefer_native': True,
|
||||
@@ -69,7 +68,6 @@ class LeIE(InfoExtractor):
|
||||
'params': {
|
||||
'hls_prefer_native': True,
|
||||
},
|
||||
'skip': 'Only available in China',
|
||||
}, {
|
||||
'url': 'http://sports.le.com/video/25737697.html',
|
||||
'only_matching': True,
|
||||
@@ -81,7 +79,7 @@ class LeIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
|
||||
# ror() and calc_time_key() are reversed from a embedded swf file in LetvPlayer.swf
|
||||
def ror(self, param1, param2):
|
||||
_loc3_ = 0
|
||||
while _loc3_ < param2:
|
||||
@@ -90,15 +88,8 @@ class LeIE(InfoExtractor):
|
||||
return param1
|
||||
|
||||
def calc_time_key(self, param1):
|
||||
_loc2_ = 773625421
|
||||
_loc3_ = self.ror(param1, _loc2_ % 13)
|
||||
_loc3_ = _loc3_ ^ _loc2_
|
||||
_loc3_ = self.ror(_loc3_, _loc2_ % 17)
|
||||
return _loc3_
|
||||
|
||||
# reversed from http://jstatic.letvcdn.com/sdk/player.js
|
||||
def get_mms_key(self, time):
|
||||
return self.ror(time, 8) ^ 185025305
|
||||
_loc2_ = 185025305
|
||||
return self.ror(param1, _loc2_ % 17) ^ _loc2_
|
||||
|
||||
# see M3U8Encryption class in KLetvPlayer.swf
|
||||
@staticmethod
|
||||
@@ -122,7 +113,7 @@ class LeIE(InfoExtractor):
|
||||
|
||||
def _check_errors(self, play_json):
|
||||
# Check for errors
|
||||
playstatus = play_json['playstatus']
|
||||
playstatus = play_json['msgs']['playstatus']
|
||||
if playstatus['status'] == 0:
|
||||
flag = playstatus['flag']
|
||||
if flag == 1:
|
||||
@@ -134,58 +125,31 @@ class LeIE(InfoExtractor):
|
||||
media_id = self._match_id(url)
|
||||
page = self._download_webpage(url, media_id)
|
||||
|
||||
play_json_h5 = self._download_json(
|
||||
'http://api.le.com/mms/out/video/playJsonH5',
|
||||
media_id, 'Downloading html5 playJson data', query={
|
||||
'id': media_id,
|
||||
'platid': 3,
|
||||
'splatid': 304,
|
||||
'format': 1,
|
||||
'tkey': self.get_mms_key(int(time.time())),
|
||||
'domain': 'www.le.com',
|
||||
'tss': 'no',
|
||||
},
|
||||
headers=self.geo_verification_headers())
|
||||
self._check_errors(play_json_h5)
|
||||
|
||||
play_json_flash = self._download_json(
|
||||
'http://api.le.com/mms/out/video/playJson',
|
||||
'http://player-pc.le.com/mms/out/video/playJson',
|
||||
media_id, 'Downloading flash playJson data', query={
|
||||
'id': media_id,
|
||||
'platid': 1,
|
||||
'splatid': 101,
|
||||
'format': 1,
|
||||
'source': 1000,
|
||||
'tkey': self.calc_time_key(int(time.time())),
|
||||
'domain': 'www.le.com',
|
||||
'region': 'cn',
|
||||
},
|
||||
headers=self.geo_verification_headers())
|
||||
self._check_errors(play_json_flash)
|
||||
|
||||
def get_h5_urls(media_url, format_id):
|
||||
location = self._download_json(
|
||||
media_url, media_id,
|
||||
'Download JSON metadata for format %s' % format_id, query={
|
||||
'format': 1,
|
||||
'expect': 3,
|
||||
'tss': 'no',
|
||||
})['location']
|
||||
|
||||
return {
|
||||
'http': update_url_query(location, {'tss': 'no'}),
|
||||
'hls': update_url_query(location, {'tss': 'ios'}),
|
||||
}
|
||||
|
||||
def get_flash_urls(media_url, format_id):
|
||||
media_url += '&' + compat_urllib_parse_urlencode({
|
||||
'm3v': 1,
|
||||
'format': 1,
|
||||
'expect': 3,
|
||||
'rateid': format_id,
|
||||
})
|
||||
|
||||
nodes_data = self._download_json(
|
||||
media_url, media_id,
|
||||
'Download JSON metadata for format %s' % format_id)
|
||||
'Download JSON metadata for format %s' % format_id,
|
||||
query={
|
||||
'm3v': 1,
|
||||
'format': 1,
|
||||
'expect': 3,
|
||||
'tss': 'ios',
|
||||
})
|
||||
|
||||
req = self._request_webpage(
|
||||
nodes_data['nodelist'][0]['location'], media_id,
|
||||
@@ -199,29 +163,28 @@ class LeIE(InfoExtractor):
|
||||
|
||||
extracted_formats = []
|
||||
formats = []
|
||||
for play_json, get_urls in ((play_json_h5, get_h5_urls), (play_json_flash, get_flash_urls)):
|
||||
playurl = play_json['playurl']
|
||||
play_domain = playurl['domain'][0]
|
||||
playurl = play_json_flash['msgs']['playurl']
|
||||
play_domain = playurl['domain'][0]
|
||||
|
||||
for format_id, format_data in playurl.get('dispatch', []).items():
|
||||
if format_id in extracted_formats:
|
||||
continue
|
||||
extracted_formats.append(format_id)
|
||||
for format_id, format_data in playurl.get('dispatch', []).items():
|
||||
if format_id in extracted_formats:
|
||||
continue
|
||||
extracted_formats.append(format_id)
|
||||
|
||||
media_url = play_domain + format_data[0]
|
||||
for protocol, format_url in get_urls(media_url, format_id).items():
|
||||
f = {
|
||||
'url': format_url,
|
||||
'ext': determine_ext(format_data[1]),
|
||||
'format_id': '%s-%s' % (protocol, format_id),
|
||||
'protocol': 'm3u8_native' if protocol == 'hls' else 'http',
|
||||
'quality': int_or_none(format_id),
|
||||
}
|
||||
media_url = play_domain + format_data[0]
|
||||
for protocol, format_url in get_flash_urls(media_url, format_id).items():
|
||||
f = {
|
||||
'url': format_url,
|
||||
'ext': determine_ext(format_data[1]),
|
||||
'format_id': '%s-%s' % (protocol, format_id),
|
||||
'protocol': 'm3u8_native' if protocol == 'hls' else 'http',
|
||||
'quality': int_or_none(format_id),
|
||||
}
|
||||
|
||||
if format_id[-1:] == 'p':
|
||||
f['height'] = int_or_none(format_id[:-1])
|
||||
if format_id[-1:] == 'p':
|
||||
f['height'] = int_or_none(format_id[:-1])
|
||||
|
||||
formats.append(f)
|
||||
formats.append(f)
|
||||
self._sort_formats(formats, ('height', 'quality', 'format_id'))
|
||||
|
||||
publish_time = parse_iso8601(self._html_search_regex(
|
||||
|
||||
@@ -148,13 +148,34 @@ class NRKBaseIE(InfoExtractor):
|
||||
|
||||
vcodec = 'none' if data.get('mediaType') == 'Audio' else None
|
||||
|
||||
# TODO: extract chapters when https://github.com/rg3/youtube-dl/pull/9409 is merged
|
||||
|
||||
for entry in entries:
|
||||
entry.update(common_info)
|
||||
for f in entry['formats']:
|
||||
f['vcodec'] = vcodec
|
||||
|
||||
points = data.get('shortIndexPoints')
|
||||
if isinstance(points, list):
|
||||
chapters = []
|
||||
for next_num, point in enumerate(points, start=1):
|
||||
if not isinstance(point, dict):
|
||||
continue
|
||||
start_time = parse_duration(point.get('startPoint'))
|
||||
if start_time is None:
|
||||
continue
|
||||
end_time = parse_duration(
|
||||
data.get('duration')
|
||||
if next_num == len(points)
|
||||
else points[next_num].get('startPoint'))
|
||||
if end_time is None:
|
||||
continue
|
||||
chapters.append({
|
||||
'start_time': start_time,
|
||||
'end_time': end_time,
|
||||
'title': point.get('title'),
|
||||
})
|
||||
if chapters and len(entries) == 1:
|
||||
entries[0]['chapters'] = chapters
|
||||
|
||||
return self.playlist_result(entries, video_id, title, description)
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,9 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
js_to_json,
|
||||
orderedSet,
|
||||
strip_jsonp,
|
||||
strip_or_none,
|
||||
unified_strdate,
|
||||
@@ -263,6 +265,13 @@ class PBSIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 2,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.pbs.org/wgbh/americanexperience/films/great-war/',
|
||||
'info_dict': {
|
||||
'id': 'great-war',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.pbs.org/wgbh/americanexperience/films/death/player/',
|
||||
'info_dict': {
|
||||
@@ -381,10 +390,10 @@ class PBSIE(InfoExtractor):
|
||||
# tabbed frontline videos
|
||||
MULTI_PART_REGEXES = (
|
||||
r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"',
|
||||
r'<a[^>]+href=["\']#video-\d+["\'][^>]+data-coveid=["\'](\d+)',
|
||||
r'<a[^>]+href=["\']#(?:video-|part)\d+["\'][^>]+data-cove[Ii]d=["\'](\d+)',
|
||||
)
|
||||
for p in MULTI_PART_REGEXES:
|
||||
tabbed_videos = re.findall(p, webpage)
|
||||
tabbed_videos = orderedSet(re.findall(p, webpage))
|
||||
if tabbed_videos:
|
||||
return tabbed_videos, presumptive_id, upload_date, description
|
||||
|
||||
@@ -464,6 +473,7 @@ class PBSIE(InfoExtractor):
|
||||
redirects.append(redirect)
|
||||
redirect_urls.add(redirect_url)
|
||||
|
||||
chapters = []
|
||||
# Player pages may also serve different qualities
|
||||
for page in ('widget/partnerplayer', 'portalplayer'):
|
||||
player = self._download_webpage(
|
||||
@@ -479,6 +489,20 @@ class PBSIE(InfoExtractor):
|
||||
extract_redirect_urls(video_info)
|
||||
if not info:
|
||||
info = video_info
|
||||
if not chapters:
|
||||
for chapter_data in re.findall(r'(?s)chapters\.push\(({.*?})\)', player):
|
||||
chapter = self._parse_json(chapter_data, video_id, js_to_json, fatal=False)
|
||||
if not chapter:
|
||||
continue
|
||||
start_time = float_or_none(chapter.get('start_time'), 1000)
|
||||
duration = float_or_none(chapter.get('duration'), 1000)
|
||||
if start_time is None or duration is None:
|
||||
continue
|
||||
chapters.append({
|
||||
'start_time': start_time,
|
||||
'end_time': start_time + duration,
|
||||
'title': chapter.get('title'),
|
||||
})
|
||||
|
||||
formats = []
|
||||
http_url = None
|
||||
@@ -588,4 +612,5 @@ class PBSIE(InfoExtractor):
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'chapters': chapters,
|
||||
}
|
||||
|
||||
@@ -33,7 +33,7 @@ class PornHubIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)|
|
||||
(?:[a-z]+\.)?pornhub\.com/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
||||
(?:www\.)?thumbzilla\.com/video/
|
||||
)
|
||||
(?P<id>[\da-z]+)
|
||||
@@ -97,6 +97,9 @@ class PornHubIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
|
||||
@@ -13,21 +13,20 @@ class RMCDecouverteIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/mediaplayer-replay.*?\bid=(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://rmcdecouverte.bfmtv.com/mediaplayer-replay/?id=1430&title=LES%20HEROS%20DU%2088e%20ETAGE',
|
||||
'url': 'http://rmcdecouverte.bfmtv.com/mediaplayer-replay/?id=13502&title=AQUAMEN:LES%20ROIS%20DES%20AQUARIUMS%20:UN%20DELICIEUX%20PROJET',
|
||||
'info_dict': {
|
||||
'id': '5111223049001',
|
||||
'id': '5419055995001',
|
||||
'ext': 'mp4',
|
||||
'title': ': LES HEROS DU 88e ETAGE',
|
||||
'description': 'Découvrez comment la bravoure de deux hommes dans la Tour Nord du World Trade Center a sauvé la vie d\'innombrables personnes le 11 septembre 2001.',
|
||||
'title': 'UN DELICIEUX PROJET',
|
||||
'description': 'md5:63610df7c8b1fc1698acd4d0d90ba8b5',
|
||||
'uploader_id': '1969646226001',
|
||||
'upload_date': '20160904',
|
||||
'timestamp': 1472951103,
|
||||
'upload_date': '20170502',
|
||||
'timestamp': 1493745308,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Only works from France',
|
||||
'skip': 'only available for a week',
|
||||
}
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s'
|
||||
|
||||
@@ -35,5 +34,12 @@ class RMCDecouverteIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
|
||||
brightcove_id = compat_parse_qs(compat_urlparse.urlparse(brightcove_legacy_url).query)['@videoPlayer'][0]
|
||||
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
||||
if brightcove_legacy_url:
|
||||
brightcove_id = compat_parse_qs(compat_urlparse.urlparse(
|
||||
brightcove_legacy_url).query)['@videoPlayer'][0]
|
||||
else:
|
||||
brightcove_id = self._search_regex(
|
||||
r'data-video-id=["\'](\d+)', webpage, 'brightcove id')
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew',
|
||||
brightcove_id)
|
||||
|
||||
@@ -80,14 +80,33 @@ class ThePlatformBaseIE(OnceIE):
|
||||
'url': src,
|
||||
})
|
||||
|
||||
duration = info.get('duration')
|
||||
tp_chapters = info.get('chapters', [])
|
||||
chapters = []
|
||||
if tp_chapters:
|
||||
def _add_chapter(start_time, end_time):
|
||||
start_time = float_or_none(start_time, 1000)
|
||||
end_time = float_or_none(end_time, 1000)
|
||||
if start_time is None or end_time is None:
|
||||
return
|
||||
chapters.append({
|
||||
'start_time': start_time,
|
||||
'end_time': end_time,
|
||||
})
|
||||
|
||||
for chapter in tp_chapters[:-1]:
|
||||
_add_chapter(chapter.get('startTime'), chapter.get('endTime'))
|
||||
_add_chapter(tp_chapters[-1].get('startTime'), tp_chapters[-1].get('endTime') or duration)
|
||||
|
||||
return {
|
||||
'title': info['title'],
|
||||
'subtitles': subtitles,
|
||||
'description': info['description'],
|
||||
'thumbnail': info['defaultThumbnailUrl'],
|
||||
'duration': int_or_none(info.get('duration'), 1000),
|
||||
'duration': float_or_none(duration, 1000),
|
||||
'timestamp': int_or_none(info.get('pubDate'), 1000) or None,
|
||||
'uploader': info.get('billingCode'),
|
||||
'chapters': chapters,
|
||||
}
|
||||
|
||||
def _extract_theplatform_metadata(self, path, video_id):
|
||||
|
||||
@@ -20,7 +20,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ViceBaseIE(AdobePassIE):
|
||||
def _extract_preplay_video(self, url, webpage):
|
||||
def _extract_preplay_video(self, url, locale, webpage):
|
||||
watch_hub_data = extract_attributes(self._search_regex(
|
||||
r'(?s)(<watch-hub\s*.+?</watch-hub>)', webpage, 'watch hub'))
|
||||
video_id = watch_hub_data['vms-id']
|
||||
@@ -32,7 +32,8 @@ class ViceBaseIE(AdobePassIE):
|
||||
resource = self._get_mvpd_resource(
|
||||
'VICELAND', title, video_id,
|
||||
watch_hub_data.get('video-rating'))
|
||||
query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource)
|
||||
query['tvetoken'] = self._extract_mvpd_auth(
|
||||
url, video_id, 'VICELAND', resource)
|
||||
|
||||
# signature generation algorithm is reverse engineered from signatureGenerator in
|
||||
# webpack:///../shared/~/vice-player/dist/js/vice-player.js in
|
||||
@@ -45,11 +46,14 @@ class ViceBaseIE(AdobePassIE):
|
||||
|
||||
try:
|
||||
host = 'www.viceland' if is_locked else self._PREPLAY_HOST
|
||||
preplay = self._download_json('https://%s.com/en_us/preplay/%s' % (host, video_id), video_id, query=query)
|
||||
preplay = self._download_json(
|
||||
'https://%s.com/%s/preplay/%s' % (host, locale, video_id),
|
||||
video_id, query=query)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
error = json.loads(e.cause.read().decode())
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True)
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, error['details']), expected=True)
|
||||
raise
|
||||
|
||||
video_data = preplay['video']
|
||||
@@ -88,41 +92,30 @@ class ViceBaseIE(AdobePassIE):
|
||||
|
||||
|
||||
class ViceIE(ViceBaseIE):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?videos?/(?P<id>[^/?#&]+)'
|
||||
IE_NAME = 'vice'
|
||||
_VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:(?P<locale>[^/]+)/)?videos?/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.vice.com/video/cowboy-capitalists-part-1',
|
||||
'md5': 'e9d77741f9e42ba583e683cd170660f7',
|
||||
'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
|
||||
'md5': '7d3ae2f9ba5f196cdd9f9efd43657ac2',
|
||||
'info_dict': {
|
||||
'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
|
||||
'id': 'N2bzkydjraWDGwnt8jAttCF6Y0PDv4Zj',
|
||||
'ext': 'flv',
|
||||
'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
|
||||
'duration': 725.983,
|
||||
'title': 'Monkey Labs of Holland',
|
||||
'description': 'md5:92b3c7dcbfe477f772dd4afa496c9149',
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://www.vice.com/video/how-to-hack-a-car',
|
||||
'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
|
||||
'info_dict': {
|
||||
'id': '3jstaBeXgAs',
|
||||
'ext': 'mp4',
|
||||
'title': 'How to Hack a Car: Phreaked Out (Episode 2)',
|
||||
'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30',
|
||||
'uploader_id': 'MotherboardTV',
|
||||
'uploader': 'Motherboard',
|
||||
'upload_date': '20140529',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': '5816510690b70e6c5fd39a56',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Waypoint',
|
||||
'title': 'The Signal From Tölva',
|
||||
'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5',
|
||||
'uploader_id': '57f7d621e05ca860fa9ccaf9',
|
||||
'timestamp': 1477941983938,
|
||||
'timestamp': 1477941983,
|
||||
'upload_date': '20161031',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -130,19 +123,31 @@ class ViceIE(ViceBaseIE):
|
||||
},
|
||||
'add_ie': ['UplynkPreplay'],
|
||||
}, {
|
||||
'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
|
||||
'only_matching': True,
|
||||
'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f',
|
||||
'info_dict': {
|
||||
'id': '581b12b60a0e1f4c0fb6ea2f',
|
||||
'ext': 'mp4',
|
||||
'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1',
|
||||
'description': '<p>Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.</p>',
|
||||
'uploader': 'VICE',
|
||||
'uploader_id': '57a204088cb727dec794c67b',
|
||||
'timestamp': 1485368119,
|
||||
'upload_date': '20170125',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {
|
||||
# AES-encrypted m3u8
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['UplynkPreplay'],
|
||||
}, {
|
||||
'url': 'http://www.vice.com/ru/video/big-night-out-ibiza-clive-martin-229',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://munchies.vice.com/en/videos/watch-the-trailer-for-our-new-series-the-pizza-show',
|
||||
'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PREPLAY_HOST = 'video.vice'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
locale, video_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
embed_code = self._search_regex(
|
||||
r'embedCode=([^&\'"]+)', webpage,
|
||||
@@ -153,10 +158,11 @@ class ViceIE(ViceBaseIE):
|
||||
r'data-youtube-id="([^"]+)"', webpage, 'youtube id', default=None)
|
||||
if youtube_id:
|
||||
return self.url_result(youtube_id, 'Youtube')
|
||||
return self._extract_preplay_video(urlh.geturl(), webpage)
|
||||
return self._extract_preplay_video(urlh.geturl(), locale, webpage)
|
||||
|
||||
|
||||
class ViceShowIE(InfoExtractor):
|
||||
IE_NAME = 'vice:show'
|
||||
_VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TEST = {
|
||||
@@ -183,6 +189,86 @@ class ViceShowIE(InfoExtractor):
|
||||
r'<title>(.+?)</title>', webpage, 'title', default=None)
|
||||
if title:
|
||||
title = re.sub(r'(.+)\s*\|\s*.+$', r'\1', title).strip()
|
||||
description = self._html_search_meta('description', webpage, 'description')
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
|
||||
return self.playlist_result(entries, show_id, title, description)
|
||||
|
||||
|
||||
class ViceArticleIE(InfoExtractor):
|
||||
IE_NAME = 'vice:article'
|
||||
_VALID_URL = r'https://www.vice.com/[^/]+/article/(?P<id>[^?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',
|
||||
'info_dict': {
|
||||
'id': '58dc0a3dee202d2a0ccfcbd8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mormon War on Porn ',
|
||||
'description': 'md5:ad396a2481e7f8afb5ed486878421090',
|
||||
'uploader': 'VICE',
|
||||
'uploader_id': '57a204088cb727dec794c693',
|
||||
'timestamp': 1489160690,
|
||||
'upload_date': '20170310',
|
||||
},
|
||||
'params': {
|
||||
# AES-encrypted m3u8
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['UplynkPreplay'],
|
||||
}, {
|
||||
'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car',
|
||||
'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
|
||||
'info_dict': {
|
||||
'id': '3jstaBeXgAs',
|
||||
'ext': 'mp4',
|
||||
'title': 'How to Hack a Car: Phreaked Out (Episode 2)',
|
||||
'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30',
|
||||
'uploader_id': 'MotherboardTV',
|
||||
'uploader': 'Motherboard',
|
||||
'upload_date': '20140529',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.vice.com/ru/article/big-night-out-ibiza-clive-martin-229',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
prefetch_data = self._parse_json(self._search_regex(
|
||||
r'window\.__PREFETCH_DATA\s*=\s*({.*});',
|
||||
webpage, 'prefetch data'), display_id)
|
||||
body = prefetch_data['body']
|
||||
|
||||
def _url_res(video_url, ie_key):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': video_url,
|
||||
'display_id': display_id,
|
||||
'ie_key': ie_key,
|
||||
}
|
||||
|
||||
embed_code = self._search_regex(
|
||||
r'embedCode=([^&\'"]+)', body,
|
||||
'ooyala embed code', default=None)
|
||||
if embed_code:
|
||||
return _url_res('ooyala:%s' % embed_code, 'Ooyala')
|
||||
|
||||
youtube_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src="(.*youtube\.com/.*)"',
|
||||
body, 'YouTube URL', default=None)
|
||||
if youtube_url:
|
||||
return _url_res(youtube_url, 'Youtube')
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'data-video-url="([^"]+)"',
|
||||
prefetch_data['embed_code'], 'video URL')
|
||||
|
||||
return _url_res(video_url, ViceIE.ie_key())
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .vice import ViceBaseIE
|
||||
|
||||
|
||||
class VicelandIE(ViceBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P<id>[a-f0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?viceland\.com/(?P<locale>[^/]+)/video/[^/]+/(?P<id>[a-f0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.viceland.com/en_us/video/trapped/588a70d0dba8a16007de7316',
|
||||
'info_dict': {
|
||||
@@ -24,10 +26,13 @@ class VicelandIE(ViceBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['UplynkPreplay'],
|
||||
'skip': '404',
|
||||
}
|
||||
_PREPLAY_HOST = 'www.viceland'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
locale = mobj.group('locale')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
return self._extract_preplay_video(url, webpage)
|
||||
return self._extract_preplay_video(url, locale, webpage)
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -11,6 +10,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
parse_age_limit,
|
||||
qualities,
|
||||
random_birthday,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
urljoin,
|
||||
@@ -47,13 +47,10 @@ class VideoPressIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
query = random_birthday('birth_year', 'birth_month', 'birth_day')
|
||||
video = self._download_json(
|
||||
'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id,
|
||||
video_id, query={
|
||||
'birth_month': random.randint(1, 12),
|
||||
'birth_day': random.randint(1, 31),
|
||||
'birth_year': random.randint(1950, 1995),
|
||||
})
|
||||
video_id, query=query)
|
||||
|
||||
title = video['title']
|
||||
|
||||
|
||||
@@ -38,7 +38,6 @@ from ..utils import (
|
||||
parse_duration,
|
||||
remove_quotes,
|
||||
remove_start,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
str_to_int,
|
||||
try_get,
|
||||
@@ -54,7 +53,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
"""Provide base functions for Youtube extractors"""
|
||||
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
|
||||
_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
|
||||
_PASSWORD_CHALLENGE_URL = 'https://accounts.google.com/signin/challenge/sl/password'
|
||||
|
||||
_LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
|
||||
_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
|
||||
_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
|
||||
|
||||
_NETRC_MACHINE = 'youtube'
|
||||
# If True it will raise an error if no login info is provided
|
||||
_LOGIN_REQUIRED = False
|
||||
@@ -96,72 +99,150 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
|
||||
login_form.update({
|
||||
'checkConnection': 'youtube',
|
||||
'Email': username,
|
||||
'Passwd': password,
|
||||
})
|
||||
|
||||
login_results = self._download_webpage(
|
||||
self._PASSWORD_CHALLENGE_URL, None,
|
||||
note='Logging in', errnote='unable to log in', fatal=False,
|
||||
data=urlencode_postdata(login_form))
|
||||
if login_results is False:
|
||||
return False
|
||||
|
||||
error_msg = self._html_search_regex(
|
||||
r'<[^>]+id="errormsg_0_Passwd"[^>]*>([^<]+)<',
|
||||
login_results, 'error message', default=None)
|
||||
if error_msg:
|
||||
raise ExtractorError('Unable to login: %s' % error_msg, expected=True)
|
||||
|
||||
if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
|
||||
raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
|
||||
|
||||
# Two-Factor
|
||||
# TODO add SMS and phone call support - these require making a request and then prompting the user
|
||||
|
||||
if re.search(r'(?i)<form[^>]+id="challenge"', login_results) is not None:
|
||||
tfa_code = self._get_tfa_info('2-step verification code')
|
||||
|
||||
if not tfa_code:
|
||||
self._downloader.report_warning(
|
||||
'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
|
||||
'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
|
||||
return False
|
||||
|
||||
tfa_code = remove_start(tfa_code, 'G-')
|
||||
|
||||
tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
|
||||
|
||||
tfa_form_strs.update({
|
||||
'Pin': tfa_code,
|
||||
'TrustDevice': 'on',
|
||||
def req(url, f_req, note, errnote):
|
||||
data = login_form.copy()
|
||||
data.update({
|
||||
'pstMsg': 1,
|
||||
'checkConnection': 'youtube',
|
||||
'checkedDomains': 'youtube',
|
||||
'hl': 'en',
|
||||
'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
|
||||
'f.req': json.dumps(f_req),
|
||||
'flowName': 'GlifWebSignIn',
|
||||
'flowEntry': 'ServiceLogin',
|
||||
})
|
||||
return self._download_json(
|
||||
url, None, note=note, errnote=errnote,
|
||||
transform_source=lambda s: re.sub(r'^[^[]*', '', s),
|
||||
fatal=False,
|
||||
data=urlencode_postdata(data), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
|
||||
'Google-Accounts-XSRF': 1,
|
||||
})
|
||||
|
||||
tfa_data = urlencode_postdata(tfa_form_strs)
|
||||
def warn(message):
|
||||
self._downloader.report_warning(message)
|
||||
|
||||
tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data)
|
||||
tfa_results = self._download_webpage(
|
||||
tfa_req, None,
|
||||
note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
|
||||
lookup_req = [
|
||||
username,
|
||||
None, [], None, 'US', None, None, 2, False, True,
|
||||
[
|
||||
None, None,
|
||||
[2, 1, None, 1,
|
||||
'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
|
||||
None, [], 4],
|
||||
1, [None, None, []], None, None, None, True
|
||||
],
|
||||
username,
|
||||
]
|
||||
|
||||
if tfa_results is False:
|
||||
return False
|
||||
lookup_results = req(
|
||||
self._LOOKUP_URL, lookup_req,
|
||||
'Looking up account info', 'Unable to look up account info')
|
||||
|
||||
if re.search(r'(?i)<form[^>]+id="challenge"', tfa_results) is not None:
|
||||
self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
|
||||
return False
|
||||
if re.search(r'(?i)<form[^>]+id="gaia_loginform"', tfa_results) is not None:
|
||||
self._downloader.report_warning('unable to log in - did the page structure change?')
|
||||
return False
|
||||
if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
|
||||
self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
|
||||
return False
|
||||
|
||||
if re.search(r'(?i)<form[^>]+id="gaia_loginform"', login_results) is not None:
|
||||
self._downloader.report_warning('unable to log in: bad username or password')
|
||||
if lookup_results is False:
|
||||
return False
|
||||
|
||||
user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
|
||||
if not user_hash:
|
||||
warn('Unable to extract user hash')
|
||||
return False
|
||||
|
||||
challenge_req = [
|
||||
user_hash,
|
||||
None, 1, None, [1, None, None, None, [password, None, True]],
|
||||
[
|
||||
None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
|
||||
1, [None, None, []], None, None, None, True
|
||||
]]
|
||||
|
||||
challenge_results = req(
|
||||
self._CHALLENGE_URL, challenge_req,
|
||||
'Logging in', 'Unable to log in')
|
||||
|
||||
if challenge_results is False:
|
||||
return
|
||||
|
||||
login_res = try_get(challenge_results, lambda x: x[0][5], list)
|
||||
if login_res:
|
||||
login_msg = try_get(login_res, lambda x: x[5], compat_str)
|
||||
warn(
|
||||
'Unable to login: %s' % 'Invalid password'
|
||||
if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
|
||||
return False
|
||||
|
||||
res = try_get(challenge_results, lambda x: x[0][-1], list)
|
||||
if not res:
|
||||
warn('Unable to extract result entry')
|
||||
return False
|
||||
|
||||
tfa = try_get(res, lambda x: x[0][0], list)
|
||||
if tfa:
|
||||
tfa_str = try_get(tfa, lambda x: x[2], compat_str)
|
||||
if tfa_str == 'TWO_STEP_VERIFICATION':
|
||||
# SEND_SUCCESS - TFA code has been successfully sent to phone
|
||||
# QUOTA_EXCEEDED - reached the limit of TFA codes
|
||||
status = try_get(tfa, lambda x: x[5], compat_str)
|
||||
if status == 'QUOTA_EXCEEDED':
|
||||
warn('Exceeded the limit of TFA codes, try later')
|
||||
return False
|
||||
|
||||
tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
|
||||
if not tl:
|
||||
warn('Unable to extract TL')
|
||||
return False
|
||||
|
||||
tfa_code = self._get_tfa_info('2-step verification code')
|
||||
|
||||
if not tfa_code:
|
||||
warn(
|
||||
'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
|
||||
'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
|
||||
return False
|
||||
|
||||
tfa_code = remove_start(tfa_code, 'G-')
|
||||
|
||||
tfa_req = [
|
||||
user_hash, None, 2, None,
|
||||
[
|
||||
9, None, None, None, None, None, None, None,
|
||||
[None, tfa_code, True, 2]
|
||||
]]
|
||||
|
||||
tfa_results = req(
|
||||
self._TFA_URL.format(tl), tfa_req,
|
||||
'Submitting TFA code', 'Unable to submit TFA code')
|
||||
|
||||
if tfa_results is False:
|
||||
return False
|
||||
|
||||
tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
|
||||
if tfa_res:
|
||||
tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
|
||||
warn(
|
||||
'Unable to finish TFA: %s' % 'Invalid TFA code'
|
||||
if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
|
||||
return False
|
||||
|
||||
check_cookie_url = try_get(
|
||||
tfa_results, lambda x: x[0][-1][2], compat_str)
|
||||
else:
|
||||
check_cookie_url = try_get(res, lambda x: x[2], compat_str)
|
||||
|
||||
if not check_cookie_url:
|
||||
warn('Unable to extract CheckCookie URL')
|
||||
return False
|
||||
|
||||
check_cookie_results = self._download_webpage(
|
||||
check_cookie_url, None, 'Checking cookie', fatal=False)
|
||||
|
||||
if check_cookie_results is False:
|
||||
return False
|
||||
|
||||
if 'https://myaccount.google.com/' not in check_cookie_results:
|
||||
warn('Unable to log in')
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _real_initialize(self):
|
||||
@@ -1257,6 +1338,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
|
||||
return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
|
||||
|
||||
@staticmethod
|
||||
def _extract_chapters(description, duration):
|
||||
if not description:
|
||||
return None
|
||||
chapter_lines = re.findall(
|
||||
r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
|
||||
description)
|
||||
if not chapter_lines:
|
||||
return None
|
||||
chapters = []
|
||||
for next_num, (chapter_line, time_point) in enumerate(
|
||||
chapter_lines, start=1):
|
||||
start_time = parse_duration(time_point)
|
||||
if start_time is None:
|
||||
continue
|
||||
end_time = (duration if next_num == len(chapter_lines)
|
||||
else parse_duration(chapter_lines[next_num][1]))
|
||||
if end_time is None:
|
||||
continue
|
||||
chapter_title = re.sub(
|
||||
r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
|
||||
chapter_title = re.sub(r'\s+', ' ', chapter_title)
|
||||
chapters.append({
|
||||
'start_time': start_time,
|
||||
'end_time': end_time,
|
||||
'title': chapter_title,
|
||||
})
|
||||
return chapters
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
@@ -1399,9 +1509,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
video_title = '_'
|
||||
|
||||
# description
|
||||
video_description = get_element_by_id("eow-description", video_webpage)
|
||||
description_original = video_description = get_element_by_id("eow-description", video_webpage)
|
||||
if video_description:
|
||||
video_description = re.sub(r'''(?x)
|
||||
description_original = video_description = re.sub(r'''(?x)
|
||||
<a\s+
|
||||
(?:[a-zA-Z-]+="[^"]*"\s+)*?
|
||||
(?:title|href)="([^"]+)"\s+
|
||||
@@ -1558,6 +1668,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if self._downloader.params.get('writeannotations', False):
|
||||
video_annotations = self._extract_annotations(video_id)
|
||||
|
||||
chapters = self._extract_chapters(description_original, video_duration)
|
||||
|
||||
if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
|
||||
self.report_rtmp_download()
|
||||
formats = [{
|
||||
@@ -1790,6 +1902,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'duration': video_duration,
|
||||
'age_limit': 18 if age_gate else 0,
|
||||
'annotations': video_annotations,
|
||||
'chapters': chapters,
|
||||
'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
|
||||
@@ -4,6 +4,7 @@ import io
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
import re
|
||||
|
||||
|
||||
from .common import AudioConversionError, PostProcessor
|
||||
@@ -22,6 +23,7 @@ from ..utils import (
|
||||
subtitles_filename,
|
||||
dfxp2srt,
|
||||
ISO639Utils,
|
||||
replace_extension,
|
||||
)
|
||||
|
||||
|
||||
@@ -429,17 +431,40 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
||||
|
||||
filename = info['filepath']
|
||||
temp_filename = prepend_extension(filename, 'temp')
|
||||
in_filenames = [filename]
|
||||
options = []
|
||||
|
||||
if info['ext'] == 'm4a':
|
||||
options = ['-vn', '-acodec', 'copy']
|
||||
options.extend(['-vn', '-acodec', 'copy'])
|
||||
else:
|
||||
options = ['-c', 'copy']
|
||||
options.extend(['-c', 'copy'])
|
||||
|
||||
for (name, value) in metadata.items():
|
||||
options.extend(['-metadata', '%s=%s' % (name, value)])
|
||||
|
||||
chapters = info.get('chapters', [])
|
||||
if chapters:
|
||||
metadata_filename = encodeFilename(replace_extension(filename, 'meta'))
|
||||
with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
|
||||
def ffmpeg_escape(text):
|
||||
return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text)
|
||||
|
||||
metadata_file_content = ';FFMETADATA1\n'
|
||||
for chapter in chapters:
|
||||
metadata_file_content += '[CHAPTER]\nTIMEBASE=1/1000\n'
|
||||
metadata_file_content += 'START=%d\n' % (chapter['start_time'] * 1000)
|
||||
metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000)
|
||||
chapter_title = chapter.get('title')
|
||||
if chapter_title:
|
||||
metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title)
|
||||
f.write(metadata_file_content)
|
||||
in_filenames.append(metadata_filename)
|
||||
options.extend(['-map_metadata', '1'])
|
||||
|
||||
self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
|
||||
self.run_ffmpeg(filename, temp_filename, options)
|
||||
self.run_ffmpeg_multiple_files(in_filenames, temp_filename, options)
|
||||
if chapters:
|
||||
os.remove(metadata_filename)
|
||||
os.remove(encodeFilename(filename))
|
||||
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||
return [], info
|
||||
|
||||
@@ -11,6 +11,7 @@ import contextlib
|
||||
import ctypes
|
||||
import datetime
|
||||
import email.utils
|
||||
import email.header
|
||||
import errno
|
||||
import functools
|
||||
import gzip
|
||||
@@ -2097,6 +2098,58 @@ def update_Request(req, url=None, data=None, headers={}, query={}):
|
||||
return new_req
|
||||
|
||||
|
||||
def _multipart_encode_impl(data, boundary):
|
||||
content_type = 'multipart/form-data; boundary=%s' % boundary
|
||||
|
||||
out = b''
|
||||
for k, v in data.items():
|
||||
out += b'--' + boundary.encode('ascii') + b'\r\n'
|
||||
if isinstance(k, compat_str):
|
||||
k = k.encode('utf-8')
|
||||
if isinstance(v, compat_str):
|
||||
v = v.encode('utf-8')
|
||||
# RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
|
||||
# suggests sending UTF-8 directly. Firefox sends UTF-8, too
|
||||
content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
|
||||
if boundary.encode('ascii') in content:
|
||||
raise ValueError('Boundary overlaps with data')
|
||||
out += content
|
||||
|
||||
out += b'--' + boundary.encode('ascii') + b'--\r\n'
|
||||
|
||||
return out, content_type
|
||||
|
||||
|
||||
def multipart_encode(data, boundary=None):
|
||||
'''
|
||||
Encode a dict to RFC 7578-compliant form-data
|
||||
|
||||
data:
|
||||
A dict where keys and values can be either Unicode or bytes-like
|
||||
objects.
|
||||
boundary:
|
||||
If specified a Unicode object, it's used as the boundary. Otherwise
|
||||
a random boundary is generated.
|
||||
|
||||
Reference: https://tools.ietf.org/html/rfc7578
|
||||
'''
|
||||
has_specified_boundary = boundary is not None
|
||||
|
||||
while True:
|
||||
if boundary is None:
|
||||
boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
|
||||
|
||||
try:
|
||||
out, content_type = _multipart_encode_impl(data, boundary)
|
||||
break
|
||||
except ValueError:
|
||||
if has_specified_boundary:
|
||||
raise
|
||||
boundary = None
|
||||
|
||||
return out, content_type
|
||||
|
||||
|
||||
def dict_get(d, key_or_keys, default=None, skip_false_values=True):
|
||||
if isinstance(key_or_keys, (list, tuple)):
|
||||
for key in key_or_keys:
|
||||
@@ -3760,3 +3813,11 @@ def write_xattr(path, key, value):
|
||||
"Couldn't find a tool to set the xattrs. "
|
||||
"Install either the python 'xattr' module, "
|
||||
"or the 'xattr' binary.")
|
||||
|
||||
|
||||
def random_birthday(year_field, month_field, day_field):
|
||||
return {
|
||||
year_field: str(random.randint(1950, 1995)),
|
||||
month_field: str(random.randint(1, 12)),
|
||||
day_field: str(random.randint(1, 31)),
|
||||
}
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2017.05.01'
|
||||
__version__ = '2017.05.07'
|
||||
|
||||
Reference in New Issue
Block a user