Compare commits

...

26 Commits

Author SHA1 Message Date
806498cf2f release 2017.09.10 2017-09-10 22:16:55 +07:00
b98339b54b [ChangeLog] Actualize 2017-09-10 22:15:55 +07:00
bf6ec2fea9 [fox] Fix extraction (#14147) 2017-09-10 22:08:32 +07:00
c3dd44e085 [rutube] Use bool_or_none 2017-09-10 19:09:27 +07:00
c7e327c4d4 [utils] Introduce bool_or_none 2017-09-10 19:08:39 +07:00
48b813748d [rutube] Rework and generalize playlist extractors (closes #13565) 2017-09-10 18:40:33 +07:00
debed8d759 [rutube:playlist] Add extractor (closes #13534) 2017-09-10 18:40:33 +07:00
51aee72d16 [README.md] Clarify how to run extractor specific test cases 2017-09-08 22:13:17 +07:00
931edb2ada [radiocanada] Add fallback for title extraction 2017-09-08 21:53:24 +07:00
5113b69124 [abcnews,chilloutsoze,cracked,vice,vk] Use dedicated YouTube embeds extraction routines 2017-09-06 00:50:25 +07:00
66c9fa36c1 [youtube] Separate methods for embeds extraction 2017-09-06 00:48:37 +07:00
c5c9bf0c12 [YoutubeDL] Ensure dir existence for each requested format (closes #14116) 2017-09-05 23:31:34 +07:00
880fa66f4f [redtube] Fix formats extraction (closes #14122) 2017-09-05 22:45:49 +07:00
6348671c4a [arte] Relax unavailability check (closes #14112) 2017-09-04 23:08:40 +07:00
efc57145c1 [manyvids] Improve (closes #14059) 2017-09-03 17:32:23 +07:00
e9b865267a [manyvids] Add support for preview videos (closes #14053) 2017-09-03 17:31:53 +07:00
bc35f07537 [vidme:user] Make tests only matching (closes #14054) 2017-09-03 17:03:51 +07:00
0b4a8eb3ac [vidme:user] Relax _VALID_URLs 2017-09-03 17:03:45 +07:00
c1c1585b31 [bpb] Improve (closes #14086) 2017-09-03 16:43:33 +07:00
0cbb841ba9 [bpb] Fix extraction (closes #14043) 2017-09-03 16:39:12 +07:00
d7c7100e3d [soundcloud] Simplify and add test (closes #14093) 2017-09-03 16:29:58 +07:00
73602bcd0c [soundcloud] Fix download URL with private tracks 2017-09-03 16:28:34 +07:00
23b2df82c7 [aliexpress:live] Fix issues (closes #13698, closes #13707) 2017-09-03 16:05:31 +07:00
503115540d [aliexpress:live] Add extractor 2017-09-03 16:05:00 +07:00
64f0e30b93 [viidea] Capture and output lecture error message (#14099) 2017-09-02 15:44:49 +07:00
a3431e1224 [radiocanada] Skip unsupported platforms (closes #14100) 2017-09-02 15:33:54 +07:00
27 changed files with 559 additions and 199 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.02*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.10*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.02** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.10**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.09.02 [debug] youtube-dl version 2017.09.10
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -82,6 +82,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file
python test/test_download.py python test/test_download.py
nosetests nosetests
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
If you want to create a build of youtube-dl yourself, you'll need If you want to create a build of youtube-dl yourself, you'll need
* python * python
@ -149,7 +151,7 @@ After you have ensured this site is distributing its content legally, you can fo
} }
``` ```
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. 8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: 9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:

View File

@ -1,3 +1,32 @@
version 2017.09.10
Core
+ [utils] Introduce bool_or_none
* [YoutubeDL] Ensure dir existence for each requested format (#14116)
Extractors
* [fox] Fix extraction (#14147)
* [rutube] Use bool_or_none
* [rutube] Rework and generalize playlist extractors (#13565)
+ [rutube:playlist] Add support for playlists (#13534, #13565)
+ [radiocanada] Add fallback for title extraction (#14145)
* [vk] Use dedicated YouTube embeds extraction routine
* [vice] Use dedicated YouTube embeds extraction routine
* [cracked] Use dedicated YouTube embeds extraction routine
* [chilloutzone] Use dedicated YouTube embeds extraction routine
* [abcnews] Use dedicated YouTube embeds extraction routine
* [youtube] Separate methods for embeds extraction
* [redtube] Fix formats extraction (#14122)
* [arte] Relax unavailability check (#14112)
+ [manyvids] Add support for preview videos from manyvids.com (#14053, #14059)
* [vidme:user] Relax URL regular expression (#14054)
* [bpb] Fix extraction (#14043, #14086)
* [soundcloud] Fix download URL with private tracks (#14093)
* [aliexpress:live] Add support for live.aliexpress.com (#13698, #13707)
* [viidea] Capture and output lecture error message (#14099)
* [radiocanada] Skip unsupported platforms (#14100)
version 2017.09.02 version 2017.09.02
Extractors Extractors

View File

@ -936,6 +936,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file
python test/test_download.py python test/test_download.py
nosetests nosetests
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
If you want to create a build of youtube-dl yourself, you'll need If you want to create a build of youtube-dl yourself, you'll need
* python * python
@ -1003,7 +1005,7 @@ After you have ensured this site is distributing its content legally, you can fo
} }
``` ```
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. 8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: 9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:

View File

@ -38,6 +38,7 @@
- **afreecatv**: afreecatv.com - **afreecatv**: afreecatv.com
- **afreecatv:global**: afreecatv.com - **afreecatv:global**: afreecatv.com
- **AirMozilla** - **AirMozilla**
- **AliExpressLive**
- **AlJazeera** - **AlJazeera**
- **Allocine** - **Allocine**
- **AlphaPorno** - **AlphaPorno**
@ -437,6 +438,7 @@
- **MakerTV** - **MakerTV**
- **mangomolo:live** - **mangomolo:live**
- **mangomolo:video** - **mangomolo:video**
- **ManyVids**
- **MatchTV** - **MatchTV**
- **MDR**: MDR.DE and KiKA - **MDR**: MDR.DE and KiKA
- **media.ccc.de** - **media.ccc.de**
@ -701,6 +703,7 @@
- **rutube:embed**: Rutube embedded videos - **rutube:embed**: Rutube embedded videos
- **rutube:movie**: Rutube movies - **rutube:movie**: Rutube movies
- **rutube:person**: Rutube person videos - **rutube:person**: Rutube person videos
- **rutube:playlist**: Rutube playlists
- **RUTV**: RUTV.RU - **RUTV**: RUTV.RU
- **Ruutu** - **Ruutu**
- **Ruv** - **Ruv**

View File

@ -1710,12 +1710,17 @@ class YoutubeDL(object):
if filename is None: if filename is None:
return return
def ensure_dir_exists(path):
try: try:
dn = os.path.dirname(sanitize_path(encodeFilename(filename))) dn = os.path.dirname(path)
if dn and not os.path.exists(dn): if dn and not os.path.exists(dn):
os.makedirs(dn) os.makedirs(dn)
return True
except (OSError, IOError) as err: except (OSError, IOError) as err:
self.report_error('unable to create directory ' + error_to_compat_str(err)) self.report_error('unable to create directory ' + error_to_compat_str(err))
return False
if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
return return
if self.params.get('writedescription', False): if self.params.get('writedescription', False):
@ -1853,8 +1858,11 @@ class YoutubeDL(object):
for f in requested_formats: for f in requested_formats:
new_info = dict(info_dict) new_info = dict(info_dict)
new_info.update(f) new_info.update(f)
fname = self.prepare_filename(new_info) fname = prepend_extension(
fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext']) self.prepare_filename(new_info),
'f%s' % f['format_id'], new_info['ext'])
if not ensure_dir_exists(fname):
return
downloaded.append(fname) downloaded.append(fname)
partial_success = dl(fname, new_info) partial_success = dl(fname, new_info)
success = success and partial_success success = success and partial_success

View File

@ -7,6 +7,7 @@ import time
from .amp import AMPIE from .amp import AMPIE
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE
from ..compat import compat_urlparse from ..compat import compat_urlparse
@ -108,9 +109,7 @@ class AbcNewsIE(InfoExtractor):
r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL') r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
full_video_url = compat_urlparse.urljoin(url, video_url) full_video_url = compat_urlparse.urljoin(url, video_url)
youtube_url = self._html_search_regex( youtube_url = YoutubeIE._extract_url(webpage)
r'<iframe[^>]+src="(https://www\.youtube\.com/embed/[^"]+)"',
webpage, 'YouTube URL', default=None)
timestamp = None timestamp = None
date_str = self._html_search_regex( date_str = self._html_search_regex(
@ -140,7 +139,7 @@ class AbcNewsIE(InfoExtractor):
} }
if youtube_url: if youtube_url:
entries = [entry, self.url_result(youtube_url, 'Youtube')] entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())]
return self.playlist_result(entries) return self.playlist_result(entries)
return entry return entry

View File

@ -0,0 +1,53 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
float_or_none,
try_get,
)
class AliExpressLiveIE(InfoExtractor):
_VALID_URL = r'https?://live\.aliexpress\.com/live/(?P<id>\d+)'
_TEST = {
'url': 'https://live.aliexpress.com/live/2800002704436634',
'md5': 'e729e25d47c5e557f2630eaf99b740a5',
'info_dict': {
'id': '2800002704436634',
'ext': 'mp4',
'title': 'CASIMA7.22',
'thumbnail': r're:http://.*\.jpg',
'uploader': 'CASIMA Official Store',
'timestamp': 1500717600,
'upload_date': '20170722',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data = self._parse_json(
self._search_regex(
r'(?s)runParams\s*=\s*({.+?})\s*;?\s*var',
webpage, 'runParams'),
video_id)
title = data['title']
formats = self._extract_m3u8_formats(
data['replyStreamUrl'], video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
return {
'id': video_id,
'title': title,
'thumbnail': data.get('coverUrl'),
'uploader': try_get(
data, lambda x: x['followBar']['name'], compat_str),
'timestamp': float_or_none(data.get('startTimeLong'), scale=1000),
'formats': formats,
}

View File

@ -82,7 +82,7 @@ class ArteTVBaseIE(InfoExtractor):
vsr = player_info['VSR'] vsr = player_info['VSR']
if not vsr and not player_info.get('VRU'): if not vsr:
raise ExtractorError( raise ExtractorError(
'Video %s is not available' % player_info.get('VID') or video_id, 'Video %s is not available' % player_info.get('VID') or video_id,
expected=True) expected=True)

View File

@ -33,13 +33,18 @@ class BpbIE(InfoExtractor):
title = self._html_search_regex( title = self._html_search_regex(
r'<h2 class="white">(.*?)</h2>', webpage, 'title') r'<h2 class="white">(.*?)</h2>', webpage, 'title')
video_info_dicts = re.findall( video_info_dicts = re.findall(
r"({\s*src:\s*'http://film\.bpb\.de/[^}]+})", webpage) r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage)
formats = [] formats = []
for video_info in video_info_dicts: for video_info in video_info_dicts:
video_info = self._parse_json(video_info, video_id, transform_source=js_to_json) video_info = self._parse_json(
quality = video_info['quality'] video_info, video_id, transform_source=js_to_json, fatal=False)
video_url = video_info['src'] if not video_info:
continue
video_url = video_info.get('src')
if not video_url:
continue
quality = 'high' if '_high' in video_url else 'low'
formats.append({ formats.append({
'url': video_url, 'url': video_url,
'preference': 10 if quality == 'high' else 0, 'preference': 10 if quality == 'high' else 0,

View File

@ -5,6 +5,7 @@ import base64
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import ( from ..utils import (
clean_html, clean_html,
ExtractorError ExtractorError
@ -70,11 +71,9 @@ class ChilloutzoneIE(InfoExtractor):
# If nativePlatform is None a fallback mechanism is used (i.e. youtube embed) # If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
if native_platform is None: if native_platform is None:
youtube_url = self._html_search_regex( youtube_url = YoutubeIE._extract_url(webpage)
r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"', if youtube_url:
webpage, 'fallback video URL', default=None) return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
if youtube_url is not None:
return self.url_result(youtube_url, ie='Youtube')
# Non Fallback: Decide to use native source (e.g. youtube or vimeo) or # Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
# the own CDN # the own CDN

View File

@ -3,6 +3,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import ( from ..utils import (
parse_iso8601, parse_iso8601,
str_to_int, str_to_int,
@ -41,11 +42,9 @@ class CrackedIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
youtube_url = self._search_regex( youtube_url = YoutubeIE._extract_url(webpage)
r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"',
webpage, 'youtube url', default=None)
if youtube_url: if youtube_url:
return self.url_result(youtube_url, 'Youtube') return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
video_url = self._html_search_regex( video_url = self._html_search_regex(
[r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'], [r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'],

View File

@ -45,6 +45,7 @@ from .anvato import AnvatoIE
from .anysex import AnySexIE from .anysex import AnySexIE
from .aol import AolIE from .aol import AolIE
from .allocine import AllocineIE from .allocine import AllocineIE
from .aliexpress import AliExpressLiveIE
from .aparat import AparatIE from .aparat import AparatIE
from .appleconnect import AppleConnectIE from .appleconnect import AppleConnectIE
from .appletrailers import ( from .appletrailers import (
@ -563,6 +564,7 @@ from .mangomolo import (
MangomoloVideoIE, MangomoloVideoIE,
MangomoloLiveIE, MangomoloLiveIE,
) )
from .manyvids import ManyVidsIE
from .matchtv import MatchTVIE from .matchtv import MatchTVIE
from .mdr import MDRIE from .mdr import MDRIE
from .mediaset import MediasetIE from .mediaset import MediasetIE
@ -897,6 +899,7 @@ from .rutube import (
RutubeEmbedIE, RutubeEmbedIE,
RutubeMovieIE, RutubeMovieIE,
RutubePersonIE, RutubePersonIE,
RutubePlaylistIE,
) )
from .rutv import RUTVIE from .rutv import RUTVIE
from .ruutu import RuutuIE from .ruutu import RuutuIE

View File

@ -3,56 +3,99 @@ from __future__ import unicode_literals
from .adobepass import AdobePassIE from .adobepass import AdobePassIE
from ..utils import ( from ..utils import (
smuggle_url, int_or_none,
update_url_query, parse_age_limit,
parse_duration,
try_get,
unified_timestamp,
) )
class FOXIE(AdobePassIE): class FOXIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)'
_TEST = { _TESTS = [{
'url': 'http://www.fox.com/watch/255180355939/7684182528', # clip
'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/',
'md5': 'ebd296fcc41dd4b19f8115d8461a3165', 'md5': 'ebd296fcc41dd4b19f8115d8461a3165',
'info_dict': { 'info_dict': {
'id': '255180355939', 'id': '4b765a60490325103ea69888fb2bd4e8',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Official Trailer: Gotham', 'title': 'Aftermath: Bruce Wayne Develops Into The Dark Knight',
'description': 'Tracing the rise of the great DC Comics Super-Villains and vigilantes, Gotham reveals an entirely new chapter that has never been told.', 'description': 'md5:549cd9c70d413adb32ce2a779b53b486',
'duration': 129, 'duration': 102,
'timestamp': 1400020798, 'timestamp': 1504291893,
'upload_date': '20140513', 'upload_date': '20170901',
'uploader': 'NEWA-FNG-FOXCOM', 'creator': 'FOX',
'series': 'Gotham',
}, },
'add_ie': ['ThePlatform'], 'params': {
} 'skip_download': True,
},
}, {
# episode, geo-restricted
'url': 'https://www.fox.com/watch/087036ca7f33c8eb79b08152b4dd75c1/',
'only_matching': True,
}, {
# episode, geo-restricted, tv provided required
'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
settings = self._parse_json(self._search_regex( video = self._download_json(
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', 'https://api.fox.com/fbc-content/v1_4/video/%s' % video_id,
webpage, 'drupal settings'), video_id) video_id, headers={
fox_pdk_player = settings['fox_pdk_player'] 'apikey': 'abdcbed02c124d393b39e818a4312055',
release_url = fox_pdk_player['release_url'] 'Content-Type': 'application/json',
query = { 'Referer': url,
'mbr': 'true',
'switch': 'http'
}
if fox_pdk_player.get('access') == 'locked':
ap_p = settings['foxAdobePassProvider']
rating = ap_p.get('videoRating')
if rating == 'n/a':
rating = None
resource = self._get_mvpd_resource('fbc-fox', None, ap_p['videoGUID'], rating)
query['auth'] = self._extract_mvpd_auth(url, video_id, 'fbc-fox', resource)
info = self._search_json_ld(webpage, video_id, fatal=False)
info.update({
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
'id': video_id,
}) })
return info title = video['name']
m3u8_url = self._download_json(
video['videoRelease']['url'], video_id)['playURL']
formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
self._sort_formats(formats)
description = video.get('description')
duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
video.get('duration')) or parse_duration(video.get('duration'))
timestamp = unified_timestamp(video.get('datePublished'))
age_limit = parse_age_limit(video.get('contentRating'))
data = try_get(
video, lambda x: x['trackingData']['properties'], dict) or {}
creator = data.get('brand') or data.get('network') or video.get('network')
series = video.get('seriesName') or data.get(
'seriesName') or data.get('show')
season_number = int_or_none(video.get('seasonNumber'))
episode = video.get('name')
episode_number = int_or_none(video.get('episodeNumber'))
release_year = int_or_none(video.get('releaseYear'))
if data.get('authRequired'):
# TODO: AP
pass
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'timestamp': timestamp,
'age_limit': age_limit,
'creator': creator,
'series': series,
'season_number': season_number,
'episode': episode,
'episode_number': episode_number,
'release_year': release_year,
'formats': formats,
}

View File

@ -2243,36 +2243,11 @@ class GenericIE(InfoExtractor):
if vid_me_embed_url is not None: if vid_me_embed_url is not None:
return self.url_result(vid_me_embed_url, 'Vidme') return self.url_result(vid_me_embed_url, 'Vidme')
# Look for embedded YouTube player # Look for YouTube embeds
matches = re.findall(r'''(?x) youtube_urls = YoutubeIE._extract_urls(webpage)
(?: if youtube_urls:
<iframe[^>]+?src=|
data-video-url=|
<embed[^>]+?src=|
embedSWF\(?:\s*|
<object[^>]+data=|
new\s+SWFObject\(
)
(["\'])
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
(?:embed|v|p)/.+?)
\1''', webpage)
if matches:
return self.playlist_from_matches( return self.playlist_from_matches(
matches, video_id, video_title, lambda m: unescapeHTML(m[1])) youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
# Look for lazyYT YouTube embed
matches = re.findall(
r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
if matches:
return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m))
# Look for Wordpress "YouTube Video Importer" plugin
matches = re.findall(r'''(?x)<div[^>]+
class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
if matches:
return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1])
matches = DailymotionIE._extract_urls(webpage) matches = DailymotionIE._extract_urls(webpage)
if matches: if matches:

View File

@ -0,0 +1,48 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import int_or_none
class ManyVidsIE(InfoExtractor):
_VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
_TEST = {
'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/',
'md5': '03f11bb21c52dd12a05be21a5c7dcc97',
'info_dict': {
'id': '133957',
'ext': 'mp4',
'title': 'everthing about me (Preview)',
'view_count': int,
'like_count': int,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'video URL', group='url')
title = '%s (Preview)' % self._html_search_regex(
r'<h2[^>]+class="m-a-0"[^>]*>([^<]+)', webpage, 'title')
like_count = int_or_none(self._search_regex(
r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
view_count = int_or_none(self._html_search_regex(
r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
'view count', default=None))
return {
'id': video_id,
'title': title,
'view_count': view_count,
'like_count': like_count,
'formats': [{
'url': video_url,
}],
}

View File

@ -20,7 +20,8 @@ from ..utils import (
class RadioCanadaIE(InfoExtractor): class RadioCanadaIE(InfoExtractor):
IE_NAME = 'radiocanada' IE_NAME = 'radiocanada'
_VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)' _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'
_TEST = { _TESTS = [
{
'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272', 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
'info_dict': { 'info_dict': {
'id': '7184272', 'id': '7184272',
@ -29,11 +30,27 @@ class RadioCanadaIE(InfoExtractor):
'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa', 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
'upload_date': '20141023', 'upload_date': '20141023',
}, },
'params': {
# m3u8 download
'skip_download': True,
}
},
{
# empty Title
'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/',
'info_dict': {
'id': '7754998',
'ext': 'mp4',
'title': 'letelejournal22h',
'description': 'INTEGRALE WEB 22H-TJ',
'upload_date': '20170720',
},
'params': { 'params': {
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
} }
]
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})
@ -59,6 +76,7 @@ class RadioCanadaIE(InfoExtractor):
device_types.append('android') device_types.append('android')
formats = [] formats = []
error = None
# TODO: extract f4m formats # TODO: extract f4m formats
# f4m formats can be extracted using flashhd device_type but they produce unplayable file # f4m formats can be extracted using flashhd device_type but they produce unplayable file
for device_type in device_types: for device_type in device_types:
@ -84,8 +102,8 @@ class RadioCanadaIE(InfoExtractor):
if not v_url: if not v_url:
continue continue
if v_url == 'null': if v_url == 'null':
raise ExtractorError('%s said: %s' % ( error = xpath_text(v_data, 'message')
self.IE_NAME, xpath_text(v_data, 'message')), expected=True) continue
ext = determine_ext(v_url) ext = determine_ext(v_url)
if ext == 'm3u8': if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
@ -129,6 +147,9 @@ class RadioCanadaIE(InfoExtractor):
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
base_url + '/manifest.f4m', video_id, base_url + '/manifest.f4m', video_id,
f4m_id='hds', fatal=False)) f4m_id='hds', fatal=False))
if not formats and error:
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, error), expected=True)
self._sort_formats(formats) self._sort_formats(formats)
subtitles = {} subtitles = {}
@ -141,7 +162,7 @@ class RadioCanadaIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': get_meta('Title'), 'title': get_meta('Title') or get_meta('AV-nomEmission'),
'description': get_meta('Description') or get_meta('ShortDescription'), 'description': get_meta('Description') or get_meta('ShortDescription'),
'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'), 'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),
'duration': int_or_none(get_meta('length')), 'duration': int_or_none(get_meta('length')),

View File

@ -3,6 +3,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@ -62,7 +63,23 @@ class RedTubeIE(InfoExtractor):
'format_id': format_id, 'format_id': format_id,
'height': int_or_none(format_id), 'height': int_or_none(format_id),
}) })
else: medias = self._parse_json(
self._search_regex(
r'mediaDefinition\s*:\s*(\[.+?\])', webpage,
'media definitions', default='{}'),
video_id, fatal=False)
if medias and isinstance(medias, list):
for media in medias:
format_url = media.get('videoUrl')
if not format_url or not isinstance(format_url, compat_str):
continue
format_id = media.get('quality')
formats.append({
'url': format_url,
'format_id': format_id,
'height': int_or_none(format_id),
})
if not formats:
video_url = self._html_search_regex( video_url = self._html_search_regex(
r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL') r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
formats.append({'url': video_url}) formats.append({'url': video_url})
@ -73,7 +90,7 @@ class RedTubeIE(InfoExtractor):
r'<span[^>]+class="added-time"[^>]*>ADDED ([^<]+)<', r'<span[^>]+class="added-time"[^>]*>ADDED ([^<]+)<',
webpage, 'upload date', fatal=False)) webpage, 'upload date', fatal=False))
duration = int_or_none(self._search_regex( duration = int_or_none(self._search_regex(
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False)) r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None))
view_count = str_to_int(self._search_regex( view_count = str_to_int(self._search_regex(
r'<span[^>]*>VIEWS</span></td>\s*<td>([\d,.]+)', r'<span[^>]*>VIEWS</span></td>\s*<td>([\d,.]+)',
webpage, 'view count', fatal=False)) webpage, 'view count', fatal=False))

View File

@ -7,43 +7,84 @@ import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_str, compat_str,
compat_parse_qs,
compat_urllib_parse_urlparse,
) )
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
unified_strdate, bool_or_none,
int_or_none,
try_get,
unified_timestamp,
) )
class RutubeIE(InfoExtractor): class RutubeBaseIE(InfoExtractor):
def _extract_video(self, video, video_id=None, require_title=True):
title = video['title'] if require_title else video.get('title')
age_limit = video.get('is_adult')
if age_limit is not None:
age_limit = 18 if age_limit is True else 0
uploader_id = try_get(video, lambda x: x['author']['id'])
category = try_get(video, lambda x: x['category']['name'])
return {
'id': video.get('id') or video_id,
'title': title,
'description': video.get('description'),
'thumbnail': video.get('thumbnail_url'),
'duration': int_or_none(video.get('duration')),
'uploader': try_get(video, lambda x: x['author']['name']),
'uploader_id': compat_str(uploader_id) if uploader_id else None,
'timestamp': unified_timestamp(video.get('created_ts')),
'category': [category] if category else None,
'age_limit': age_limit,
'view_count': int_or_none(video.get('hits')),
'comment_count': int_or_none(video.get('comments_count')),
'is_live': bool_or_none(video.get('is_livestream')),
}
class RutubeIE(RutubeBaseIE):
IE_NAME = 'rutube' IE_NAME = 'rutube'
IE_DESC = 'Rutube videos' IE_DESC = 'Rutube videos'
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})' _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})'
_TESTS = [{ _TESTS = [{
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
'md5': '79938ade01294ef7e27574890d0d3769',
'info_dict': { 'info_dict': {
'id': '3eac3b4561676c17df9132a9a1e62e3e', 'id': '3eac3b4561676c17df9132a9a1e62e3e',
'ext': 'mp4', 'ext': 'flv',
'title': 'Раненный кенгуру забежал в аптеку', 'title': 'Раненный кенгуру забежал в аптеку',
'description': 'http://www.ntdtv.ru ', 'description': 'http://www.ntdtv.ru ',
'duration': 80, 'duration': 80,
'uploader': 'NTDRussian', 'uploader': 'NTDRussian',
'uploader_id': '29790', 'uploader_id': '29790',
'timestamp': 1381943602,
'upload_date': '20131016', 'upload_date': '20131016',
'age_limit': 0, 'age_limit': 0,
}, },
'params': {
# It requires ffmpeg (m3u8 download)
'skip_download': True,
},
}, { }, {
'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661', 'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661', 'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252',
'only_matching': True,
}, {
'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source',
'only_matching': True,
}] }]
@classmethod
def suitable(cls, url):
return False if RutubePlaylistIE.suitable(url) else super(RutubeIE, cls).suitable(url)
@staticmethod @staticmethod
def _extract_urls(webpage): def _extract_urls(webpage):
return [mobj.group('url') for mobj in re.finditer( return [mobj.group('url') for mobj in re.finditer(
@ -52,12 +93,12 @@ class RutubeIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video = self._download_json( video = self._download_json(
'http://rutube.ru/api/video/%s/?format=json' % video_id, 'http://rutube.ru/api/video/%s/?format=json' % video_id,
video_id, 'Downloading video JSON') video_id, 'Downloading video JSON')
# Some videos don't have the author field info = self._extract_video(video, video_id)
author = video.get('author') or {}
options = self._download_json( options = self._download_json(
'http://rutube.ru/api/play/options/%s/?format=json' % video_id, 'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
@ -79,19 +120,8 @@ class RutubeIE(InfoExtractor):
}) })
self._sort_formats(formats) self._sort_formats(formats)
return { info['formats'] = formats
'id': video['id'], return info
'title': video['title'],
'description': video['description'],
'duration': video['duration'],
'view_count': video['hits'],
'formats': formats,
'thumbnail': video['thumbnail_url'],
'uploader': author.get('name'),
'uploader_id': compat_str(author['id']) if author else None,
'upload_date': unified_strdate(video['created_ts']),
'age_limit': 18 if video['is_adult'] else 0,
}
class RutubeEmbedIE(InfoExtractor): class RutubeEmbedIE(InfoExtractor):
@ -103,7 +133,8 @@ class RutubeEmbedIE(InfoExtractor):
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
'info_dict': { 'info_dict': {
'id': 'a10e53b86e8f349080f718582ce4c661', 'id': 'a10e53b86e8f349080f718582ce4c661',
'ext': 'mp4', 'ext': 'flv',
'timestamp': 1387830582,
'upload_date': '20131223', 'upload_date': '20131223',
'uploader_id': '297833', 'uploader_id': '297833',
'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89', 'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89',
@ -111,7 +142,7 @@ class RutubeEmbedIE(InfoExtractor):
'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89', 'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89',
}, },
'params': { 'params': {
'skip_download': 'Requires ffmpeg', 'skip_download': True,
}, },
}, { }, {
'url': 'http://rutube.ru/play/embed/8083783', 'url': 'http://rutube.ru/play/embed/8083783',
@ -125,10 +156,51 @@ class RutubeEmbedIE(InfoExtractor):
canonical_url = self._html_search_regex( canonical_url = self._html_search_regex(
r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage, r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage,
'Canonical URL') 'Canonical URL')
return self.url_result(canonical_url, 'Rutube') return self.url_result(canonical_url, RutubeIE.ie_key())
class RutubeChannelIE(InfoExtractor): class RutubePlaylistBaseIE(RutubeBaseIE):
def _next_page_url(self, page_num, playlist_id, *args, **kwargs):
return self._PAGE_TEMPLATE % (playlist_id, page_num)
def _entries(self, playlist_id, *args, **kwargs):
next_page_url = None
for pagenum in itertools.count(1):
page = self._download_json(
next_page_url or self._next_page_url(
pagenum, playlist_id, *args, **kwargs),
playlist_id, 'Downloading page %s' % pagenum)
results = page.get('results')
if not results or not isinstance(results, list):
break
for result in results:
video_url = result.get('video_url')
if not video_url or not isinstance(video_url, compat_str):
continue
entry = self._extract_video(result, require_title=False)
entry.update({
'_type': 'url',
'url': video_url,
'ie_key': RutubeIE.ie_key(),
})
yield entry
next_page_url = page.get('next')
if not next_page_url or not page.get('has_next'):
break
def _extract_playlist(self, playlist_id, *args, **kwargs):
return self.playlist_result(
self._entries(playlist_id, *args, **kwargs),
playlist_id, kwargs.get('playlist_name'))
def _real_extract(self, url):
return self._extract_playlist(self._match_id(url))
class RutubeChannelIE(RutubePlaylistBaseIE):
IE_NAME = 'rutube:channel' IE_NAME = 'rutube:channel'
IE_DESC = 'Rutube channels' IE_DESC = 'Rutube channels'
_VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)' _VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)'
@ -142,27 +214,8 @@ class RutubeChannelIE(InfoExtractor):
_PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json' _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'
def _extract_videos(self, channel_id, channel_title=None):
entries = []
for pagenum in itertools.count(1):
page = self._download_json(
self._PAGE_TEMPLATE % (channel_id, pagenum),
channel_id, 'Downloading page %s' % pagenum)
results = page['results']
if not results:
break
entries.extend(self.url_result(result['video_url'], 'Rutube') for result in results)
if not page['has_next']:
break
return self.playlist_result(entries, channel_id, channel_title)
def _real_extract(self, url): class RutubeMovieIE(RutubePlaylistBaseIE):
mobj = re.match(self._VALID_URL, url)
channel_id = mobj.group('id')
return self._extract_videos(channel_id)
class RutubeMovieIE(RutubeChannelIE):
IE_NAME = 'rutube:movie' IE_NAME = 'rutube:movie'
IE_DESC = 'Rutube movies' IE_DESC = 'Rutube movies'
_VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)' _VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)'
@ -176,11 +229,11 @@ class RutubeMovieIE(RutubeChannelIE):
movie = self._download_json( movie = self._download_json(
self._MOVIE_TEMPLATE % movie_id, movie_id, self._MOVIE_TEMPLATE % movie_id, movie_id,
'Downloading movie JSON') 'Downloading movie JSON')
movie_name = movie['name'] return self._extract_playlist(
return self._extract_videos(movie_id, movie_name) movie_id, playlist_name=movie.get('name'))
class RutubePersonIE(RutubeChannelIE): class RutubePersonIE(RutubePlaylistBaseIE):
IE_NAME = 'rutube:person' IE_NAME = 'rutube:person'
IE_DESC = 'Rutube person videos' IE_DESC = 'Rutube person videos'
_VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)' _VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)'
@ -193,3 +246,35 @@ class RutubePersonIE(RutubeChannelIE):
}] }]
_PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
class RutubePlaylistIE(RutubePlaylistBaseIE):
IE_NAME = 'rutube:playlist'
IE_DESC = 'Rutube playlists'
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?.*?\bpl_id=(?P<id>\d+)'
_TESTS = [{
'url': 'https://rutube.ru/video/cecd58ed7d531fc0f3d795d51cee9026/?pl_id=3097&pl_type=tag',
'info_dict': {
'id': '3097',
},
'playlist_count': 27,
}, {
'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source',
'only_matching': True,
}]
_PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/%s/%s/?page=%s&format=json'
@staticmethod
def suitable(url):
params = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0])
def _next_page_url(self, page_num, playlist_id, item_kind):
return self._PAGE_TEMPLATE % (item_kind, playlist_id, page_num)
def _real_extract(self, url):
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
playlist_kind = qs['pl_type'][0]
playlist_id = qs['pl_id'][0]
return self._extract_playlist(playlist_id, item_kind=playlist_kind)

View File

@ -1,8 +1,8 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import itertools import itertools
import re
from .common import ( from .common import (
InfoExtractor, InfoExtractor,
@ -17,6 +17,7 @@ from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
unified_strdate, unified_strdate,
update_url_query,
) )
@ -120,6 +121,21 @@ class SoundcloudIE(InfoExtractor):
'license': 'cc-by-sa', 'license': 'cc-by-sa',
}, },
}, },
# private link, downloadable format
{
'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd',
'md5': '64a60b16e617d41d0bef032b7f55441e',
'info_dict': {
'id': '340344461',
'ext': 'wav',
'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
'uploader': 'Ori Uplift Music',
'upload_date': '20170831',
'duration': 7449,
'license': 'all-rights-reserved',
},
},
] ]
_CLIENT_ID = 'JlZIsxg2hY5WnBgtn3jfS0UYCl0K8DOg' _CLIENT_ID = 'JlZIsxg2hY5WnBgtn3jfS0UYCl0K8DOg'
@ -160,11 +176,13 @@ class SoundcloudIE(InfoExtractor):
'license': info.get('license'), 'license': info.get('license'),
} }
formats = [] formats = []
query = {'client_id': self._CLIENT_ID}
if secret_token is not None:
query['secret_token'] = secret_token
if info.get('downloadable', False): if info.get('downloadable', False):
# We can build a direct link to the song # We can build a direct link to the song
format_url = ( format_url = update_url_query(
'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format( 'https://api.soundcloud.com/tracks/%s/download' % track_id, query)
track_id, self._CLIENT_ID))
formats.append({ formats.append({
'format_id': 'download', 'format_id': 'download',
'ext': info.get('original_format', 'mp3'), 'ext': info.get('original_format', 'mp3'),
@ -176,10 +194,7 @@ class SoundcloudIE(InfoExtractor):
# We have to retrieve the url # We have to retrieve the url
format_dict = self._download_json( format_dict = self._download_json(
'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id, 'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
track_id, 'Downloading track url', query={ track_id, 'Downloading track url', query=query)
'client_id': self._CLIENT_ID,
'secret_token': secret_token,
})
for key, stream_url in format_dict.items(): for key, stream_url in format_dict.items():
abr = int_or_none(self._search_regex( abr = int_or_none(self._search_regex(
@ -216,7 +231,7 @@ class SoundcloudIE(InfoExtractor):
# cannot be always used, sometimes it can give an HTTP 404 error # cannot be always used, sometimes it can give an HTTP 404 error
formats.append({ formats.append({
'format_id': 'fallback', 'format_id': 'fallback',
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID, 'url': update_url_query(info['stream_url'], query),
'ext': ext, 'ext': ext,
}) })

View File

@ -7,6 +7,7 @@ import hashlib
import json import json
from .adobepass import AdobePassIE from .adobepass import AdobePassIE
from .youtube import YoutubeIE
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError from ..compat import compat_HTTPError
from ..utils import ( from ..utils import (
@ -261,11 +262,9 @@ class ViceArticleIE(InfoExtractor):
if embed_code: if embed_code:
return _url_res('ooyala:%s' % embed_code, 'Ooyala') return _url_res('ooyala:%s' % embed_code, 'Ooyala')
youtube_url = self._html_search_regex( youtube_url = YoutubeIE._extract_url(body)
r'<iframe[^>]+src="(.*youtube\.com/.*)"',
body, 'YouTube URL', default=None)
if youtube_url: if youtube_url:
return _url_res(youtube_url, 'Youtube') return _url_res(youtube_url, YoutubeIE.ie_key())
video_url = self._html_search_regex( video_url = self._html_search_regex(
r'data-video-url="([^"]+)"', r'data-video-url="([^"]+)"',

View File

@ -263,29 +263,35 @@ class VidmeListBaseIE(InfoExtractor):
class VidmeUserIE(VidmeListBaseIE): class VidmeUserIE(VidmeListBaseIE):
IE_NAME = 'vidme:user' IE_NAME = 'vidme:user'
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})(?!/likes)(?:[^\da-zA-Z]|$)' _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z_-]{6,})(?!/likes)(?:[^\da-zA-Z_-]|$)'
_API_ITEM = 'list' _API_ITEM = 'list'
_TITLE = 'Videos' _TITLE = 'Videos'
_TEST = { _TESTS = [{
'url': 'https://vid.me/EFARCHIVE', 'url': 'https://vid.me/MasakoX',
'info_dict': { 'info_dict': {
'id': '3834632', 'id': '16112341',
'title': 'EFARCHIVE - %s' % _TITLE, 'title': 'MasakoX - %s' % _TITLE,
}, },
'playlist_mincount': 238, 'playlist_mincount': 191,
} }, {
'url': 'https://vid.me/unsQuare_netWork',
'only_matching': True,
}]
class VidmeUserLikesIE(VidmeListBaseIE): class VidmeUserLikesIE(VidmeListBaseIE):
IE_NAME = 'vidme:user:likes' IE_NAME = 'vidme:user:likes'
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})/likes' _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z_-]{6,})/likes'
_API_ITEM = 'likes' _API_ITEM = 'likes'
_TITLE = 'Likes' _TITLE = 'Likes'
_TEST = { _TESTS = [{
'url': 'https://vid.me/ErinAlexis/likes', 'url': 'https://vid.me/ErinAlexis/likes',
'info_dict': { 'info_dict': {
'id': '6483530', 'id': '6483530',
'title': 'ErinAlexis - %s' % _TITLE, 'title': 'ErinAlexis - %s' % _TITLE,
}, },
'playlist_mincount': 415, 'playlist_mincount': 415,
} }, {
'url': 'https://vid.me/Kaleidoscope-Ish/likes',
'only_matching': True,
}]

View File

@ -4,12 +4,14 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urlparse, compat_HTTPError,
compat_str, compat_str,
compat_urlparse,
) )
from ..utils import ( from ..utils import (
parse_duration, ExtractorError,
js_to_json, js_to_json,
parse_duration,
parse_iso8601, parse_iso8601,
) )
@ -128,9 +130,16 @@ class ViideaIE(InfoExtractor):
base_url = self._proto_relative_url(cfg['livepipe'], 'http:') base_url = self._proto_relative_url(cfg['livepipe'], 'http:')
try:
lecture_data = self._download_json( lecture_data = self._download_json(
'%s/site/api/lecture/%s?format=json' % (base_url, lecture_id), '%s/site/api/lecture/%s?format=json' % (base_url, lecture_id),
lecture_id)['lecture'][0] lecture_id)['lecture'][0]
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
msg = self._parse_json(
e.cause.read().decode('utf-8'), lecture_id)
raise ExtractorError(msg['detail'], expected=True)
raise
lecture_info = { lecture_info = {
'id': lecture_id, 'id': lecture_id,

View File

@ -25,6 +25,7 @@ from ..utils import (
from .dailymotion import DailymotionIE from .dailymotion import DailymotionIE
from .pladform import PladformIE from .pladform import PladformIE
from .vimeo import VimeoIE from .vimeo import VimeoIE
from .youtube import YoutubeIE
class VKBaseIE(InfoExtractor): class VKBaseIE(InfoExtractor):
@ -345,11 +346,9 @@ class VKIE(VKBaseIE):
if re.search(error_re, info_page): if re.search(error_re, info_page):
raise ExtractorError(error_msg % video_id, expected=True) raise ExtractorError(error_msg % video_id, expected=True)
youtube_url = self._search_regex( youtube_url = YoutubeIE._extract_url(info_page)
r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"',
info_page, 'youtube iframe', default=None)
if youtube_url: if youtube_url:
return self.url_result(youtube_url, 'Youtube') return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
vimeo_url = VimeoIE._extract_url(url, info_page) vimeo_url = VimeoIE._extract_url(url, info_page)
if vimeo_url is not None: if vimeo_url is not None:

View File

@ -1374,6 +1374,43 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
playback_url, video_id, 'Marking watched', playback_url, video_id, 'Marking watched',
'Unable to mark watched', fatal=False) 'Unable to mark watched', fatal=False)
@staticmethod
def _extract_urls(webpage):
# Embedded YouTube player
entries = [
unescapeHTML(mobj.group('url'))
for mobj in re.finditer(r'''(?x)
(?:
<iframe[^>]+?src=|
data-video-url=|
<embed[^>]+?src=|
embedSWF\(?:\s*|
<object[^>]+data=|
new\s+SWFObject\(
)
(["\'])
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
(?:embed|v|p)/.+?)
\1''', webpage)]
# lazyYT YouTube embed
entries.extend(list(map(
unescapeHTML,
re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
# Wordpress "YouTube Video Importer" plugin
matches = re.findall(r'''(?x)<div[^>]+
class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
entries.extend(m[-1] for m in matches)
return entries
@staticmethod
def _extract_url(webpage):
urls = YoutubeIE._extract_urls(webpage)
return urls[0] if urls else None
@classmethod @classmethod
def extract_id(cls, url): def extract_id(cls, url):
mobj = re.match(cls._VALID_URL, url, re.VERBOSE) mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

View File

@ -1815,6 +1815,10 @@ def float_or_none(v, scale=1, invscale=1, default=None):
return default return default
def bool_or_none(v, default=None):
return v if isinstance(v, bool) else default
def strip_or_none(v): def strip_or_none(v):
return None if v is None else v.strip() return None if v is None else v.strip()

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2017.09.02' __version__ = '2017.09.10'