youtube-dl/youtube_dl/extractor/behindkink.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import url_basename


class BehindKinkIE(InfoExtractor):
    _VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
    _TEST = {
        'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/',
        'md5': '507b57d8fdcd75a41a9a7bdb7989c762',
        'info_dict': {
            'id': '37127',
            'ext': 'mp4',
            'title': 'What are you passionate about – Marley Blaze',
            'description': 'Getting a better understanding of the talent that comes through the doors of the Armory is one of our missions at Behind Kink. Asking the question what are you passionate about helps us get a littl...',
            'upload_date': '20141205',
            'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg',
            'age_limit': 18,
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        display_id = mobj.group('id')
        year = mobj.group('year')
        month = mobj.group('month')
        day = mobj.group('day')
        upload_date = year + month + day

        webpage = self._download_webpage(url, display_id)

        video_url = self._search_regex(
            r'<source src="(.*?)" type="video/mp4" />', webpage, 'video URL')

        video_id = url_basename(video_url)
        video_id = video_id.split('_')[0]

        return {
            'id': video_id,
            'url': video_url,
            'ext': 'mp4',
            'title': self._og_search_title(webpage),
            'display_id': display_id,
            'thumbnail': self._og_search_thumbnail(webpage),
            'description': self._og_search_description(webpage),
            'upload_date': upload_date,
            'age_limit': 18,
        }
-												[BehindKink] Add new extractor

											
										
										
											2014-09-13 08:47:19 -07:00
+								# coding: utf-8
 								from __future__ import unicode_literals
 								import re
 								from .common import InfoExtractor
 								from ..utils import url_basename
 								class BehindKinkIE(InfoExtractor):
-												[BehindKink] Minor fixes
 - fix _VALID_URL regex
 - remove unnecessary variable
 - remove second call of report_extraction

											
										
										
											2014-09-15 06:09:17 -07:00
+								    _VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
-												[BehindKink] Add new extractor

											
										
										
											2014-09-13 08:47:19 -07:00
+								    _TEST = {
-												[BehindKink] Replace test

Old one is not accessible anymore

											
										
										
											2014-12-11 05:26:59 -08:00
+								        'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/',
 								        'md5': '507b57d8fdcd75a41a9a7bdb7989c762',
-												[BehindKink] Add new extractor

											
										
										
											2014-09-13 08:47:19 -07:00
+								        'info_dict': {
-												[BehindKink] Replace test

Old one is not accessible anymore

											
										
										
											2014-12-11 05:26:59 -08:00
+								            'id': '37127',
-												[BehindKink] Add new extractor

											
										
										
											2014-09-13 08:47:19 -07:00
+								            'ext': 'mp4',
-												[BehindKink] Replace test

Old one is not accessible anymore

											
										
										
											2014-12-11 05:26:59 -08:00
+								            'title': 'What are you passionate about – Marley Blaze',
 								            'description': 'Getting a better understanding of the talent that comes through the doors of the Armory is one of our missions at Behind Kink. Asking the question what are you passionate about helps us get a littl...',
 								            'upload_date': '20141205',
 								            'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg',
-												[BehindKink] Add new extractor

											
										
										
											2014-09-13 08:47:19 -07:00
+								            'age_limit': 18,
 								        }
 								    }
 								    def _real_extract(self, url):
 								        mobj = re.match(self._VALID_URL, url)
 								        display_id = mobj.group('id')
 								        year = mobj.group('year')
 								        month = mobj.group('month')
 								        day = mobj.group('day')
 								        upload_date = year + month + day
-												[BehindKink] Minor fixes
 - fix _VALID_URL regex
 - remove unnecessary variable
 - remove second call of report_extraction

											
										
										
											2014-09-15 06:09:17 -07:00
+								        webpage = self._download_webpage(url, display_id)
-												[BehindKink] Add new extractor

											
										
										
											2014-09-13 08:47:19 -07:00
 								        video_url = self._search_regex(
-												[BehindKink] Update  URL extraction

											
										
										
											2014-12-11 05:25:26 -08:00
+								            r'<source src="(.*?)" type="video/mp4" />', webpage, 'video URL')
-												[BehindKink] Add new extractor

											
										
										
											2014-09-13 08:47:19 -07:00
 								        video_id = url_basename(video_url)
 								        video_id = video_id.split('_')[0]
 								        return {
 								            'id': video_id,
 								            'url': video_url,
 								            'ext': 'mp4',
 								            'title': self._og_search_title(webpage),
 								            'display_id': display_id,
 								            'thumbnail': self._og_search_thumbnail(webpage),
 								            'description': self._og_search_description(webpage),
 								            'upload_date': upload_date,
 								            'age_limit': 18,
 								        }