youtube-dl/youtube_dl/extractor/myspace.py

from __future__ import unicode_literals

import re
import json

from .common import InfoExtractor
from ..compat import (
    compat_str,
)
from ..utils import ExtractorError


class MySpaceIE(InfoExtractor):
    _VALID_URL = r'https?://myspace\.com/([^/]+)/(?P<mediatype>video/[^/]+/|music/song/.*?)(?P<id>\d+)'

    _TESTS = [
        {
            'url': 'https://myspace.com/coldplay/video/viva-la-vida/100008689',
            'info_dict': {
                'id': '100008689',
                'ext': 'flv',
                'title': 'Viva La Vida',
                'description': 'The official Viva La Vida video, directed by Hype Williams',
                'uploader': 'Coldplay',
                'uploader_id': 'coldplay',
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
        },
        # songs
        {
            'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
            'md5': 'f1d7323321f6b7775bf1e3754c1707dc',
            'info_dict': {
                'id': '93388656',
                'ext': 'flv',
                'playlist': 'The Demo',
                'title': 'Of weakened soul...',
                'uploader': 'Killsorrow',
                'uploader_id': 'killsorrow',
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
        }, {
            'add_ie': ['Vevo'],
            'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
            'info_dict': {
                'id': u'USZM20600099',
                'title': u'Animal I Have Become',
                'uploader': u'Three Days Grace',
                'timestamp': int,
            },
            'skip': 'VEVO is only available in some countries',
        }, {
            'add_ie': ['Youtube'],
            'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
            'info_dict': {
                'id': 'ypWvQgnJrSU',
                'title': 'Starset - First Light',
                'uploader': 'Jacob Soren',
                'uploader_id': 'SorenPromotions',
                'upload_date': '20140725',
            }
        },
    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        player_url = self._search_regex(
            r'playerSwf":"([^"?]*)', webpage, 'player URL')

        if mobj.group('mediatype').startswith('music/song'):
            # songs don't store any useful info in the 'context' variable
            song_data = self._search_regex(
                r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id,
                webpage, 'song_data', default=None, group=0)
            if song_data is None:
                self.to_screen(
                    '%s: No downloadable song on this page' % video_id)
                return
            def search_data(name):
                return self._search_regex(
                    r'''data-%s=([\'"])(.*?)\1''' % name,
                    song_data, name, default='', group=2)
            streamUrl = search_data('stream-url')
            if not streamUrl:
                vevo_id = search_data('vevo-id')
                youtube_id = search_data('youtube-id')
                if vevo_id:
                    self.to_screen('Vevo video detected: %s' % vevo_id)
                    return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
                elif youtube_id:
                    self.to_screen('Youtube video detected: %s' % youtube_id)
                    return self.url_result(youtube_id, ie='Youtube')
                else:
                    raise ExtractorError(
                        'Found song but don\'t know how to download it')
            info = {
                'id': video_id,
                'title': self._og_search_title(webpage),
                'uploader': search_data('artist-name'),
                'uploader_id': search_data('artist-username'),
                'playlist': search_data('album-title'),
                'thumbnail': self._og_search_thumbnail(webpage),
            }
        else:
            context = json.loads(self._search_regex(
                r'context = ({.*?});', webpage, 'context'))
            video = context['video']
            streamUrl = video['streamUrl']
            info = {
                'id': compat_str(video['mediaId']),
                'title': video['title'],
                'description': video['description'],
                'thumbnail': video['imageUrl'],
                'uploader': video['artistName'],
                'uploader_id': video['artistUsername'],
            }

        rtmp_url, play_path = streamUrl.split(';', 1)
        info.update({
            'url': rtmp_url,
            'play_path': play_path,
            'player_url': player_url,
            'ext': 'flv',
        })
        return info
[myspace] Add support for song urls (fixes #2040) 2014-01-19 02:38:48 -08:00			`from __future__ import unicode_literals`

Add an extractor for MySpace (closes #1666) 2013-10-28 14:02:17 -07:00			`import re`
			`import json`

			`from .common import InfoExtractor`
[myspace] pep8 and modernization 2014-11-23 13:12:18 -08:00			`from ..compat import (`
Add an extractor for MySpace (closes #1666) 2013-10-28 14:02:17 -07:00			`compat_str,`
			`)`
[myspace] Redirect to other extractors There are many songs just linked from Vevo/YouTube to MySpace. Vevo example: https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041 YouTube example: https://myspace.com/starset2/music/song/first-light-95799905-106964426 2014-11-30 11:00:16 -08:00			`from ..utils import ExtractorError`
Add an extractor for MySpace (closes #1666) 2013-10-28 14:02:17 -07:00

			`class MySpaceIE(InfoExtractor):`
[myspace] More robust mediatype check 2014-01-19 17:44:08 -08:00			`_VALID_URL = r'https?://myspace\.com/([^/]+)/(?P<mediatype>video/[^/]+/\|music/song/.*?)(?P<id>\d+)'`
[myspace] Add support for song urls (fixes #2040) 2014-01-19 02:38:48 -08:00
			`_TESTS = [`
			`{`
			`'url': 'https://myspace.com/coldplay/video/viva-la-vida/100008689',`
			`'info_dict': {`
			`'id': '100008689',`
			`'ext': 'flv',`
			`'title': 'Viva La Vida',`
			`'description': 'The official Viva La Vida video, directed by Hype Williams',`
			`'uploader': 'Coldplay',`
			`'uploader_id': 'coldplay',`
			`},`
			`'params': {`
			`# rtmp download`
			`'skip_download': True,`
			`},`
Add an extractor for MySpace (closes #1666) 2013-10-28 14:02:17 -07:00			`},`
[myspace] Update tests 2014-11-30 10:57:35 -08:00			`# songs`
[myspace] Add support for song urls (fixes #2040) 2014-01-19 02:38:48 -08:00			`{`
[myspace] Update tests 2014-11-30 10:57:35 -08:00			`'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',`
			`'md5': 'f1d7323321f6b7775bf1e3754c1707dc',`
[myspace] Add support for song urls (fixes #2040) 2014-01-19 02:38:48 -08:00			`'info_dict': {`
[myspace] Update tests 2014-11-30 10:57:35 -08:00			`'id': '93388656',`
[myspace] Add support for song urls (fixes #2040) 2014-01-19 02:38:48 -08:00			`'ext': 'flv',`
[myspace] Update tests 2014-11-30 10:57:35 -08:00			`'playlist': 'The Demo',`
			`'title': 'Of weakened soul...',`
			`'uploader': 'Killsorrow',`
			`'uploader_id': 'killsorrow',`
[myspace] Add support for song urls (fixes #2040) 2014-01-19 02:38:48 -08:00			`},`
			`'params': {`
			`# rtmp download`
			`'skip_download': True,`
			`},`
[myspace] Update tests 2014-11-30 10:57:35 -08:00			`}, {`
			`'add_ie': ['Vevo'],`
			`'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',`
			`'info_dict': {`
			`'id': u'USZM20600099',`
			`'title': u'Animal I Have Become',`
			`'uploader': u'Three Days Grace',`
			`'timestamp': int,`
			`},`
			`'skip': 'VEVO is only available in some countries',`
			`}, {`
			`'add_ie': ['Youtube'],`
			`'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',`
			`'info_dict': {`
			`'id': 'ypWvQgnJrSU',`
			`'title': 'Starset - First Light',`
			`'uploader': 'Jacob Soren',`
			`'uploader_id': 'SorenPromotions',`
			`'upload_date': '20140725',`
			`}`
Add an extractor for MySpace (closes #1666) 2013-10-28 14:02:17 -07:00			`},`
[myspace] Add support for song urls (fixes #2040) 2014-01-19 02:38:48 -08:00			`]`
Add an extractor for MySpace (closes #1666) 2013-10-28 14:02:17 -07:00
			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
			`video_id = mobj.group('id')`
			`webpage = self._download_webpage(url, video_id)`
[myspace] Use player_url for faster download It keeps reconnecting without it. Download time decreased from 7+ minutes to 25 seconds for me. 2014-11-30 10:06:23 -08:00			`player_url = self._search_regex(`
			`r'playerSwf":"([^"?]*)', webpage, 'player URL')`
[myspace] Add support for song urls (fixes #2040) 2014-01-19 02:38:48 -08:00
[myspace] More robust mediatype check 2014-01-19 17:44:08 -08:00			`if mobj.group('mediatype').startswith('music/song'):`
[myspace] Add support for song urls (fixes #2040) 2014-01-19 02:38:48 -08:00			`# songs don't store any useful info in the 'context' variable`
[myspace] Handle non-playable songs I'm adding this because sometimes there is a song page, but you cannot play it. Example: https://myspace.com/starset2/music/song/let-it-die-maniac-agenda-remix-bonus-track-95799916-106964439 It will be useful for downloading whole album with songs like this. 2014-11-30 10:36:24 -08:00			`song_data = self._search_regex(`
			`r'''<button.data-song-id=(["\'])%s\1.''' % video_id,`
			`webpage, 'song_data', default=None, group=0)`
			`if song_data is None:`
			`self.to_screen(`
			`'%s: No downloadable song on this page' % video_id)`
			`return`
[myspace] Add support for song urls (fixes #2040) 2014-01-19 02:38:48 -08:00			`def search_data(name):`
[myspace] pep8 and modernization 2014-11-23 13:12:18 -08:00			`return self._search_regex(`
[myspace] Handle non-playable songs I'm adding this because sometimes there is a song page, but you cannot play it. Example: https://myspace.com/starset2/music/song/let-it-die-maniac-agenda-remix-bonus-track-95799916-106964439 It will be useful for downloading whole album with songs like this. 2014-11-30 10:36:24 -08:00			`r'''data-%s=([\'"])(.*?)\1''' % name,`
			`song_data, name, default='', group=2)`
[myspace] Add support for song urls (fixes #2040) 2014-01-19 02:38:48 -08:00			`streamUrl = search_data('stream-url')`
[myspace] Redirect to other extractors There are many songs just linked from Vevo/YouTube to MySpace. Vevo example: https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041 YouTube example: https://myspace.com/starset2/music/song/first-light-95799905-106964426 2014-11-30 11:00:16 -08:00			`if not streamUrl:`
			`vevo_id = search_data('vevo-id')`
			`youtube_id = search_data('youtube-id')`
			`if vevo_id:`
			`self.to_screen('Vevo video detected: %s' % vevo_id)`
			`return self.url_result('vevo:%s' % vevo_id, ie='Vevo')`
			`elif youtube_id:`
			`self.to_screen('Youtube video detected: %s' % youtube_id)`
			`return self.url_result(youtube_id, ie='Youtube')`
			`else:`
			`raise ExtractorError(`
			`'Found song but don\'t know how to download it')`
[myspace] Add support for song urls (fixes #2040) 2014-01-19 02:38:48 -08:00			`info = {`
			`'id': video_id,`
			`'title': self._og_search_title(webpage),`
[myspace] Add more data to info dict `uploader` is an artist `playlist` is an album 2014-11-30 10:07:36 -08:00			`'uploader': search_data('artist-name'),`
[myspace] Add support for song urls (fixes #2040) 2014-01-19 02:38:48 -08:00			`'uploader_id': search_data('artist-username'),`
[myspace] Add more data to info dict `uploader` is an artist `playlist` is an album 2014-11-30 10:07:36 -08:00			`'playlist': search_data('album-title'),`
[myspace] Add support for song urls (fixes #2040) 2014-01-19 02:38:48 -08:00			`'thumbnail': self._og_search_thumbnail(webpage),`
			`}`
			`else:`
[myspace] pep8 and modernization 2014-11-23 13:12:18 -08:00			`context = json.loads(self._search_regex(`
			`r'context = ({.*?});', webpage, 'context'))`
[myspace] Add support for song urls (fixes #2040) 2014-01-19 02:38:48 -08:00			`video = context['video']`
			`streamUrl = video['streamUrl']`
			`info = {`
			`'id': compat_str(video['mediaId']),`
			`'title': video['title'],`
			`'description': video['description'],`
			`'thumbnail': video['imageUrl'],`
			`'uploader': video['artistName'],`
			`'uploader_id': video['artistUsername'],`
			`}`

			`rtmp_url, play_path = streamUrl.split(';', 1)`
			`info.update({`
Add an extractor for MySpace (closes #1666) 2013-10-28 14:02:17 -07:00			`'url': rtmp_url,`
			`'play_path': play_path,`
[myspace] Use player_url for faster download It keeps reconnecting without it. Download time decreased from 7+ minutes to 25 seconds for me. 2014-11-30 10:06:23 -08:00			`'player_url': player_url,`
Add an extractor for MySpace (closes #1666) 2013-10-28 14:02:17 -07:00			`'ext': 'flv',`
[myspace] Add support for song urls (fixes #2040) 2014-01-19 02:38:48 -08:00			`})`
			`return info`