Compare commits
104 Commits
2014.03.17
...
2014.03.24
Author | SHA1 | Date | |
---|---|---|---|
|
02e4482e22 | ||
|
b8a792de80 | ||
|
fac55558ad | ||
|
b2799ff96d | ||
|
7a249480b4 | ||
|
f605128d13 | ||
|
ba40a74666 | ||
|
fb8ae2d438 | ||
|
893f8832b5 | ||
|
878d11ec29 | ||
|
515bbe4b5b | ||
|
75f2e25ba9 | ||
|
0d466d34a3 | ||
|
6949d81095 | ||
|
f847ca02d3 | ||
|
510243ba58 | ||
|
b540697a8a | ||
|
0d3641e589 | ||
|
72546c831e | ||
|
d26db9269d | ||
|
4c0941853a | ||
|
c11726364e | ||
|
c577d735c6 | ||
|
9f0375f61a | ||
|
5e114e4bfe | ||
|
83622b6d2f | ||
|
3d87426c2d | ||
|
ce328530a9 | ||
|
f70daac108 | ||
|
912b38b428 | ||
|
6e25c58ed7 | ||
|
51fb2e98d2 | ||
|
38d63d846e | ||
|
07cec9776e | ||
|
ea38e55fff | ||
|
257cfebfe6 | ||
|
6eefe53329 | ||
|
1986025d2b | ||
|
c9aa111b4f | ||
|
bfcb6e3917 | ||
|
2c1396073e | ||
|
401983c6a0 | ||
|
391dc3ee07 | ||
|
be3b8fa30f | ||
|
9f5809b3e8 | ||
|
0320ddc192 | ||
|
56dd55721c | ||
|
231f76b530 | ||
|
55442a7812 | ||
|
43b81eb98a | ||
|
bfd718793c | ||
|
a9c2896e22 | ||
|
278229d195 | ||
|
fa154d1dbe | ||
|
7e2ede9891 | ||
|
74af99fc2f | ||
|
0f2a2ba14b | ||
|
e24b5a8610 | ||
|
750f9020ae | ||
|
f82863851e | ||
|
933a5b3792 | ||
|
aa488e1385 | ||
|
d77650525d | ||
|
3e50c29984 | ||
|
64e7ad6045 | ||
|
23f4a93bb4 | ||
|
6f13b055f1 | ||
|
1f91bd15c3 | ||
|
11a15be4ce | ||
|
14e17e18cb | ||
|
1b124d1942 | ||
|
747373d4ae | ||
|
18d367c0a5 | ||
|
a1a530b067 | ||
|
cb9722cb3f | ||
|
773c0b4bb8 | ||
|
23c322a531 | ||
|
7e8c0af004 | ||
|
d2983ccb25 | ||
|
f24e9833dc | ||
|
bc2bdf5709 | ||
|
627a209f74 | ||
|
1a4895453a | ||
|
aab74fa106 | ||
|
2bd9efd4c2 | ||
|
39a743fb9b | ||
|
4966a0b22d | ||
|
fc26023120 | ||
|
8d7c0cca13 | ||
|
f66ede4328 | ||
|
cc88b90ec8 | ||
|
b6c5fa9a0b | ||
|
dff10eaa77 | ||
|
4e6f9aeca1 | ||
|
e68301af21 | ||
|
17286a96f2 | ||
|
0892363e6d | ||
|
f102372b5f | ||
|
ecbe1ad207 | ||
|
410afb2003 | ||
|
685052fc7b | ||
|
d95e35d659 | ||
|
1439073049 | ||
|
1f7659dbe9 |
@@ -3,3 +3,5 @@ include test/*.py
|
||||
include test/*.json
|
||||
include youtube-dl.bash-completion
|
||||
include youtube-dl.1
|
||||
recursive-include docs *
|
||||
prune docs/_build
|
||||
|
@@ -28,6 +28,9 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--user-agent UA specify a custom user agent
|
||||
--referer REF specify a custom referer, use if the video
|
||||
access is restricted to one domain
|
||||
--add-header FIELD:VALUE specify a custom HTTP header and its value,
|
||||
separated by a colon ':'. You can use this
|
||||
option multiple times
|
||||
--list-extractors List all supported extractors and the URLs
|
||||
they would handle
|
||||
--extractor-descriptions Output descriptions of all supported
|
||||
@@ -36,6 +39,9 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
an empty string (--proxy "") for direct
|
||||
connection
|
||||
--no-check-certificate Suppress HTTPS certificate validation.
|
||||
--prefer-insecure Use an unencrypted connection to retrieve
|
||||
information about the video. (Currently
|
||||
supported only for YouTube)
|
||||
--cache-dir DIR Location in the filesystem where youtube-dl
|
||||
can store some downloaded information
|
||||
permanently. By default $XDG_CACHE_HOME
|
||||
|
@@ -22,6 +22,12 @@ fi
|
||||
|
||||
if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi
|
||||
version="$1"
|
||||
major_version=$(echo "$version" | sed -n 's#^\([0-9]*\.[0-9]*\.[0-9]*\).*#\1#p')
|
||||
if test "$major_version" '!=' "$(date '+%Y.%m.%d')"; then
|
||||
echo "$version does not start with today's date!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -z "`git tag | grep "$version"`" ]; then echo 'ERROR: version already present'; exit 1; fi
|
||||
if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: the working directory is not clean; commit or stash changes'; exit 1; fi
|
||||
useless_files=$(find youtube_dl -type f -not -name '*.py')
|
||||
@@ -70,7 +76,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
|
||||
git checkout HEAD -- youtube-dl youtube-dl.exe
|
||||
|
||||
/bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
|
||||
for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
|
||||
for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
|
||||
scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
|
||||
ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
|
||||
ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
|
||||
|
1
docs/.gitignore
vendored
Normal file
1
docs/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
_build/
|
177
docs/Makefile
Normal file
177
docs/Makefile
Normal file
@@ -0,0 +1,177 @@
|
||||
# Makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line.
|
||||
SPHINXOPTS =
|
||||
SPHINXBUILD = sphinx-build
|
||||
PAPER =
|
||||
BUILDDIR = _build
|
||||
|
||||
# User-friendly check for sphinx-build
|
||||
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
|
||||
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
|
||||
endif
|
||||
|
||||
# Internal variables.
|
||||
PAPEROPT_a4 = -D latex_paper_size=a4
|
||||
PAPEROPT_letter = -D latex_paper_size=letter
|
||||
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
|
||||
# the i18n builder cannot share the environment and doctrees with the others
|
||||
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
|
||||
|
||||
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
|
||||
|
||||
help:
|
||||
@echo "Please use \`make <target>' where <target> is one of"
|
||||
@echo " html to make standalone HTML files"
|
||||
@echo " dirhtml to make HTML files named index.html in directories"
|
||||
@echo " singlehtml to make a single large HTML file"
|
||||
@echo " pickle to make pickle files"
|
||||
@echo " json to make JSON files"
|
||||
@echo " htmlhelp to make HTML files and a HTML help project"
|
||||
@echo " qthelp to make HTML files and a qthelp project"
|
||||
@echo " devhelp to make HTML files and a Devhelp project"
|
||||
@echo " epub to make an epub"
|
||||
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
|
||||
@echo " latexpdf to make LaTeX files and run them through pdflatex"
|
||||
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
|
||||
@echo " text to make text files"
|
||||
@echo " man to make manual pages"
|
||||
@echo " texinfo to make Texinfo files"
|
||||
@echo " info to make Texinfo files and run them through makeinfo"
|
||||
@echo " gettext to make PO message catalogs"
|
||||
@echo " changes to make an overview of all changed/added/deprecated items"
|
||||
@echo " xml to make Docutils-native XML files"
|
||||
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
|
||||
@echo " linkcheck to check all external links for integrity"
|
||||
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
|
||||
|
||||
clean:
|
||||
rm -rf $(BUILDDIR)/*
|
||||
|
||||
html:
|
||||
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
|
||||
@echo
|
||||
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
|
||||
|
||||
dirhtml:
|
||||
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
|
||||
@echo
|
||||
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
|
||||
|
||||
singlehtml:
|
||||
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
|
||||
@echo
|
||||
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
|
||||
|
||||
pickle:
|
||||
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
|
||||
@echo
|
||||
@echo "Build finished; now you can process the pickle files."
|
||||
|
||||
json:
|
||||
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
|
||||
@echo
|
||||
@echo "Build finished; now you can process the JSON files."
|
||||
|
||||
htmlhelp:
|
||||
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
|
||||
@echo
|
||||
@echo "Build finished; now you can run HTML Help Workshop with the" \
|
||||
".hhp project file in $(BUILDDIR)/htmlhelp."
|
||||
|
||||
qthelp:
|
||||
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
|
||||
@echo
|
||||
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
|
||||
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
|
||||
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/youtube-dl.qhcp"
|
||||
@echo "To view the help file:"
|
||||
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/youtube-dl.qhc"
|
||||
|
||||
devhelp:
|
||||
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
|
||||
@echo
|
||||
@echo "Build finished."
|
||||
@echo "To view the help file:"
|
||||
@echo "# mkdir -p $$HOME/.local/share/devhelp/youtube-dl"
|
||||
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/youtube-dl"
|
||||
@echo "# devhelp"
|
||||
|
||||
epub:
|
||||
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
|
||||
@echo
|
||||
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
|
||||
|
||||
latex:
|
||||
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
||||
@echo
|
||||
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
|
||||
@echo "Run \`make' in that directory to run these through (pdf)latex" \
|
||||
"(use \`make latexpdf' here to do that automatically)."
|
||||
|
||||
latexpdf:
|
||||
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
||||
@echo "Running LaTeX files through pdflatex..."
|
||||
$(MAKE) -C $(BUILDDIR)/latex all-pdf
|
||||
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
|
||||
|
||||
latexpdfja:
|
||||
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
||||
@echo "Running LaTeX files through platex and dvipdfmx..."
|
||||
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
|
||||
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
|
||||
|
||||
text:
|
||||
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
|
||||
@echo
|
||||
@echo "Build finished. The text files are in $(BUILDDIR)/text."
|
||||
|
||||
man:
|
||||
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
|
||||
@echo
|
||||
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
|
||||
|
||||
texinfo:
|
||||
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
|
||||
@echo
|
||||
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
|
||||
@echo "Run \`make' in that directory to run these through makeinfo" \
|
||||
"(use \`make info' here to do that automatically)."
|
||||
|
||||
info:
|
||||
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
|
||||
@echo "Running Texinfo files through makeinfo..."
|
||||
make -C $(BUILDDIR)/texinfo info
|
||||
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
|
||||
|
||||
gettext:
|
||||
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
|
||||
@echo
|
||||
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
|
||||
|
||||
changes:
|
||||
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
|
||||
@echo
|
||||
@echo "The overview file is in $(BUILDDIR)/changes."
|
||||
|
||||
linkcheck:
|
||||
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
|
||||
@echo
|
||||
@echo "Link check complete; look for any errors in the above output " \
|
||||
"or in $(BUILDDIR)/linkcheck/output.txt."
|
||||
|
||||
doctest:
|
||||
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
|
||||
@echo "Testing of doctests in the sources finished, look at the " \
|
||||
"results in $(BUILDDIR)/doctest/output.txt."
|
||||
|
||||
xml:
|
||||
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
|
||||
@echo
|
||||
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
|
||||
|
||||
pseudoxml:
|
||||
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
|
||||
@echo
|
||||
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
|
71
docs/conf.py
Normal file
71
docs/conf.py
Normal file
@@ -0,0 +1,71 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# youtube-dl documentation build configuration file, created by
|
||||
# sphinx-quickstart on Fri Mar 14 21:05:43 2014.
|
||||
#
|
||||
# This file is execfile()d with the current directory set to its
|
||||
# containing dir.
|
||||
#
|
||||
# Note that not all possible configuration values are present in this
|
||||
# autogenerated file.
|
||||
#
|
||||
# All configuration values have a default; values that are commented out
|
||||
# serve to show the default.
|
||||
|
||||
import sys
|
||||
import os
|
||||
# Allows to import youtube_dl
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# -- General configuration ------------------------------------------------
|
||||
|
||||
# Add any Sphinx extension module names here, as strings. They can be
|
||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
||||
# ones.
|
||||
extensions = [
|
||||
'sphinx.ext.autodoc',
|
||||
]
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ['_templates']
|
||||
|
||||
# The suffix of source filenames.
|
||||
source_suffix = '.rst'
|
||||
|
||||
# The master toctree document.
|
||||
master_doc = 'index'
|
||||
|
||||
# General information about the project.
|
||||
project = u'youtube-dl'
|
||||
copyright = u'2014, Ricardo Garcia Gonzalez'
|
||||
|
||||
# The version info for the project you're documenting, acts as replacement for
|
||||
# |version| and |release|, also used in various other places throughout the
|
||||
# built documents.
|
||||
#
|
||||
# The short X.Y version.
|
||||
import youtube_dl
|
||||
version = youtube_dl.__version__
|
||||
# The full version, including alpha/beta/rc tags.
|
||||
release = version
|
||||
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
exclude_patterns = ['_build']
|
||||
|
||||
# The name of the Pygments (syntax highlighting) style to use.
|
||||
pygments_style = 'sphinx'
|
||||
|
||||
# -- Options for HTML output ----------------------------------------------
|
||||
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
html_theme = 'default'
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_static_path = ['_static']
|
||||
|
||||
# Output file base name for HTML help builder.
|
||||
htmlhelp_basename = 'youtube-dldoc'
|
23
docs/index.rst
Normal file
23
docs/index.rst
Normal file
@@ -0,0 +1,23 @@
|
||||
Welcome to youtube-dl's documentation!
|
||||
======================================
|
||||
|
||||
*youtube-dl* is a command-line program to download videos from YouTube.com and more sites.
|
||||
It can also be used in Python code.
|
||||
|
||||
Developer guide
|
||||
---------------
|
||||
|
||||
This section contains information for using *youtube-dl* from Python programs.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
module_guide
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
||||
|
67
docs/module_guide.rst
Normal file
67
docs/module_guide.rst
Normal file
@@ -0,0 +1,67 @@
|
||||
Using the ``youtube_dl`` module
|
||||
===============================
|
||||
|
||||
When using the ``youtube_dl`` module, you start by creating an instance of :class:`YoutubeDL` and adding all the available extractors:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> from youtube_dl import YoutubeDL
|
||||
>>> ydl = YoutubeDL()
|
||||
>>> ydl.add_default_info_extractors()
|
||||
|
||||
Extracting video information
|
||||
----------------------------
|
||||
|
||||
You use the :meth:`YoutubeDL.extract_info` method for getting the video information, which returns a dictionary:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> info = ydl.extract_info('http://www.youtube.com/watch?v=BaW_jenozKc', download=False)
|
||||
[youtube] Setting language
|
||||
[youtube] BaW_jenozKc: Downloading webpage
|
||||
[youtube] BaW_jenozKc: Downloading video info webpage
|
||||
[youtube] BaW_jenozKc: Extracting video information
|
||||
>>> info['title']
|
||||
'youtube-dl test video "\'/\\ä↭𝕐'
|
||||
>>> info['height'], info['width']
|
||||
(720, 1280)
|
||||
|
||||
If you want to download or play the video you can get its url:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> info['url']
|
||||
'https://...'
|
||||
|
||||
Extracting playlist information
|
||||
-------------------------------
|
||||
|
||||
The playlist information is extracted in a similar way, but the dictionary is a bit different:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> playlist = ydl.extract_info('http://www.ted.com/playlists/13/open_source_open_world', download=False)
|
||||
[TED] open_source_open_world: Downloading playlist webpage
|
||||
...
|
||||
>>> playlist['title']
|
||||
'Open-source, open world'
|
||||
|
||||
|
||||
|
||||
You can access the videos in the playlist with the ``entries`` field:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> for video in playlist['entries']:
|
||||
... print('Video #%d: %s' % (video['playlist_index'], video['title']))
|
||||
|
||||
Video #1: How Arduino is open-sourcing imagination
|
||||
Video #2: The year open data went worldwide
|
||||
Video #3: Massive-scale online collaboration
|
||||
Video #4: The art of asking
|
||||
Video #5: How cognitive surplus will change the world
|
||||
Video #6: The birth of Wikipedia
|
||||
Video #7: Coding a better government
|
||||
Video #8: The era of open innovation
|
||||
Video #9: The currency of the new economy is trust
|
||||
|
@@ -9,7 +9,10 @@ import sys
|
||||
|
||||
import youtube_dl.extractor
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.utils import preferredencoding
|
||||
from youtube_dl.utils import (
|
||||
compat_str,
|
||||
preferredencoding,
|
||||
)
|
||||
|
||||
|
||||
def get_params(override=None):
|
||||
@@ -83,3 +86,45 @@ def gettestcases():
|
||||
|
||||
|
||||
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||
|
||||
|
||||
def expect_info_dict(self, expected_dict, got_dict):
|
||||
for info_field, expected in expected_dict.items():
|
||||
if isinstance(expected, compat_str) and expected.startswith('re:'):
|
||||
got = got_dict.get(info_field)
|
||||
match_str = expected[len('re:'):]
|
||||
match_rex = re.compile(match_str)
|
||||
|
||||
self.assertTrue(
|
||||
isinstance(got, compat_str) and match_rex.match(got),
|
||||
u'field %s (value: %r) should match %r' % (info_field, got, match_str))
|
||||
elif isinstance(expected, type):
|
||||
got = got_dict.get(info_field)
|
||||
self.assertTrue(isinstance(got, expected),
|
||||
u'Expected type %r, but got value %r of type %r' % (expected, got, type(got)))
|
||||
else:
|
||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||
got = 'md5:' + md5(got_dict.get(info_field))
|
||||
else:
|
||||
got = got_dict.get(info_field)
|
||||
self.assertEqual(expected, got,
|
||||
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
||||
|
||||
# Check for the presence of mandatory fields
|
||||
for key in ('id', 'url', 'title', 'ext'):
|
||||
self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
|
||||
# Check for mandatory fields that are automatically set by YoutubeDL
|
||||
for key in ['webpage_url', 'extractor', 'extractor_key']:
|
||||
self.assertTrue(got_dict.get(key), u'Missing field: %s' % key)
|
||||
|
||||
# Are checkable fields missing from the test case definition?
|
||||
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
||||
for key, value in got_dict.items()
|
||||
if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
|
||||
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
|
||||
if missing_keys:
|
||||
sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
|
||||
self.assertFalse(
|
||||
missing_keys,
|
||||
'Missing keys in test definition: %s' % (
|
||||
', '.join(sorted(missing_keys))))
|
||||
|
@@ -141,6 +141,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
def test_pbs(self):
|
||||
# https://github.com/rg3/youtube-dl/issues/2350
|
||||
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
|
||||
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -9,16 +9,16 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from test.helper import (
|
||||
get_params,
|
||||
gettestcases,
|
||||
try_rm,
|
||||
expect_info_dict,
|
||||
md5,
|
||||
report_warning
|
||||
try_rm,
|
||||
report_warning,
|
||||
)
|
||||
|
||||
|
||||
import hashlib
|
||||
import io
|
||||
import json
|
||||
import re
|
||||
import socket
|
||||
|
||||
import youtube_dl.YoutubeDL
|
||||
@@ -135,40 +135,8 @@ def generator(test_case):
|
||||
self.assertEqual(md5_for_file, tc['md5'])
|
||||
with io.open(info_json_fn, encoding='utf-8') as infof:
|
||||
info_dict = json.load(infof)
|
||||
for (info_field, expected) in tc.get('info_dict', {}).items():
|
||||
if isinstance(expected, compat_str) and expected.startswith('re:'):
|
||||
got = info_dict.get(info_field)
|
||||
match_str = expected[len('re:'):]
|
||||
match_rex = re.compile(match_str)
|
||||
|
||||
self.assertTrue(
|
||||
isinstance(got, compat_str) and match_rex.match(got),
|
||||
u'field %s (value: %r) should match %r' % (info_field, got, match_str))
|
||||
elif isinstance(expected, type):
|
||||
got = info_dict.get(info_field)
|
||||
self.assertTrue(isinstance(got, expected),
|
||||
u'Expected type %r, but got value %r of type %r' % (expected, got, type(got)))
|
||||
else:
|
||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||
got = 'md5:' + md5(info_dict.get(info_field))
|
||||
else:
|
||||
got = info_dict.get(info_field)
|
||||
self.assertEqual(expected, got,
|
||||
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
||||
|
||||
# Check for the presence of mandatory fields
|
||||
for key in ('id', 'url', 'title', 'ext'):
|
||||
self.assertTrue(key in info_dict.keys() and info_dict[key])
|
||||
# Check for mandatory fields that are automatically set by YoutubeDL
|
||||
for key in ['webpage_url', 'extractor', 'extractor_key']:
|
||||
self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
|
||||
|
||||
# If checkable fields are missing from the test case, print the info_dict
|
||||
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
||||
for key, value in info_dict.items()
|
||||
if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
|
||||
if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()):
|
||||
sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
|
||||
expect_info_dict(self, tc.get('info_dict', {}), info_dict)
|
||||
finally:
|
||||
try_rm_tcs_files()
|
||||
|
||||
|
@@ -9,8 +9,10 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL
|
||||
|
||||
from test.helper import (
|
||||
expect_info_dict,
|
||||
FakeYDL,
|
||||
)
|
||||
|
||||
from youtube_dl.extractor import (
|
||||
AcademicEarthCourseIE,
|
||||
@@ -37,6 +39,9 @@ from youtube_dl.extractor import (
|
||||
GoogleSearchIE,
|
||||
GenericIE,
|
||||
TEDIE,
|
||||
ToypicsUserIE,
|
||||
XTubeUserIE,
|
||||
InstagramUserIE,
|
||||
)
|
||||
|
||||
|
||||
@@ -269,5 +274,46 @@ class TestPlaylists(unittest.TestCase):
|
||||
self.assertEqual(result['title'], 'Who are the hackers?')
|
||||
self.assertTrue(len(result['entries']) >= 6)
|
||||
|
||||
def test_toypics_user(self):
|
||||
dl = FakeYDL()
|
||||
ie = ToypicsUserIE(dl)
|
||||
result = ie.extract('http://videos.toypics.net/Mikey')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], 'Mikey')
|
||||
self.assertTrue(len(result['entries']) >= 17)
|
||||
|
||||
def test_xtube_user(self):
|
||||
dl = FakeYDL()
|
||||
ie = XTubeUserIE(dl)
|
||||
result = ie.extract('http://www.xtube.com/community/profile.php?user=greenshowers')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], 'greenshowers')
|
||||
self.assertTrue(len(result['entries']) >= 155)
|
||||
|
||||
def test_InstagramUser(self):
|
||||
dl = FakeYDL()
|
||||
ie = InstagramUserIE(dl)
|
||||
result = ie.extract('http://instagram.com/porsche')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], 'porsche')
|
||||
self.assertTrue(len(result['entries']) >= 2)
|
||||
test_video = next(
|
||||
e for e in result['entries']
|
||||
if e['id'] == '614605558512799803_462752227')
|
||||
dl.add_default_extra_info(test_video, ie, '(irrelevant URL)')
|
||||
dl.process_video_result(test_video, download=False)
|
||||
EXPECTED = {
|
||||
'id': '614605558512799803_462752227',
|
||||
'ext': 'mp4',
|
||||
'title': '#Porsche Intelligent Performance.',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'uploader': 'Porsche',
|
||||
'uploader_id': 'porsche',
|
||||
'timestamp': 1387486713,
|
||||
'upload_date': '20131219',
|
||||
}
|
||||
expect_info_dict(self, EXPECTED, test_video)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# Various small unit tests
|
||||
import io
|
||||
import json
|
||||
import xml.etree.ElementTree
|
||||
|
||||
#from youtube_dl.utils import htmlentity_transform
|
||||
@@ -35,6 +36,8 @@ from youtube_dl.utils import (
|
||||
url_basename,
|
||||
urlencode_postdata,
|
||||
xpath_with_ns,
|
||||
parse_iso8601,
|
||||
strip_jsonp,
|
||||
)
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
@@ -266,5 +269,16 @@ class TestUtil(unittest.TestCase):
|
||||
data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
|
||||
self.assertTrue(isinstance(data, bytes))
|
||||
|
||||
def test_parse_iso8601(self):
|
||||
self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266)
|
||||
self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
|
||||
self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266)
|
||||
|
||||
def test_strip_jsonp(self):
|
||||
stripped = strip_jsonp('cb ([ {"id":"532cb",\n\n\n"x":\n3}\n]\n);')
|
||||
d = json.loads(stripped)
|
||||
self.assertEqual(d, [{"id": "532cb", "x": 3}])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -1,4 +0,0 @@
|
||||
# Legacy file for backwards compatibility, use youtube_dl.extractor instead!
|
||||
|
||||
from .extractor.common import InfoExtractor, SearchInfoExtractor
|
||||
from .extractor import gen_extractors, get_info_extractor
|
@@ -148,6 +148,8 @@ class YoutubeDL(object):
|
||||
again.
|
||||
cookiefile: File name where cookies should be read from and dumped to.
|
||||
nocheckcertificate:Do not verify SSL certificates
|
||||
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
|
||||
At the moment, this is only supported by YouTube.
|
||||
proxy: URL of the proxy server to use
|
||||
socket_timeout: Time to wait for unresponsive hosts, in seconds
|
||||
bidi_workaround: Work around buggy terminals without bidirectional text
|
||||
@@ -510,13 +512,7 @@ class YoutubeDL(object):
|
||||
'_type': 'compat_list',
|
||||
'entries': ie_result,
|
||||
}
|
||||
self.add_extra_info(ie_result,
|
||||
{
|
||||
'extractor': ie.IE_NAME,
|
||||
'webpage_url': url,
|
||||
'webpage_url_basename': url_basename(url),
|
||||
'extractor_key': ie.ie_key(),
|
||||
})
|
||||
self.add_default_extra_info(ie_result, ie, url)
|
||||
if process:
|
||||
return self.process_ie_result(ie_result, download, extra_info)
|
||||
else:
|
||||
@@ -533,7 +529,15 @@ class YoutubeDL(object):
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
self.report_error('no suitable InfoExtractor: %s' % url)
|
||||
self.report_error('no suitable InfoExtractor for URL %s' % url)
|
||||
|
||||
def add_default_extra_info(self, ie_result, ie, url):
|
||||
self.add_extra_info(ie_result, {
|
||||
'extractor': ie.IE_NAME,
|
||||
'webpage_url': url,
|
||||
'webpage_url_basename': url_basename(url),
|
||||
'extractor_key': ie.ie_key(),
|
||||
})
|
||||
|
||||
def process_ie_result(self, ie_result, download=True, extra_info={}):
|
||||
"""
|
||||
|
@@ -56,7 +56,6 @@ __authors__ = (
|
||||
__license__ = 'Public Domain'
|
||||
|
||||
import codecs
|
||||
import getpass
|
||||
import io
|
||||
import locale
|
||||
import optparse
|
||||
@@ -68,6 +67,7 @@ import sys
|
||||
|
||||
|
||||
from .utils import (
|
||||
compat_getpass,
|
||||
compat_print,
|
||||
DateRange,
|
||||
decodeOption,
|
||||
@@ -227,6 +227,9 @@ def parseOpts(overrideArguments=None):
|
||||
general.add_option('--referer',
|
||||
dest='referer', help='specify a custom referer, use if the video access is restricted to one domain',
|
||||
metavar='REF', default=None)
|
||||
general.add_option('--add-header',
|
||||
dest='headers', help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', action="append",
|
||||
metavar='FIELD:VALUE')
|
||||
general.add_option('--list-extractors',
|
||||
action='store_true', dest='list_extractors',
|
||||
help='List all supported extractors and the URLs they would handle', default=False)
|
||||
@@ -237,6 +240,9 @@ def parseOpts(overrideArguments=None):
|
||||
'--proxy', dest='proxy', default=None, metavar='URL',
|
||||
help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
|
||||
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
|
||||
general.add_option(
|
||||
'--prefer-insecure', action='store_true', dest='prefer_insecure',
|
||||
help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
|
||||
general.add_option(
|
||||
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
|
||||
help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
|
||||
@@ -257,7 +263,6 @@ def parseOpts(overrideArguments=None):
|
||||
action='store_true',
|
||||
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
|
||||
|
||||
|
||||
selection.add_option(
|
||||
'--playlist-start',
|
||||
dest='playliststart', metavar='NUMBER', default=1, type=int,
|
||||
@@ -554,6 +559,16 @@ def _real_main(argv=None):
|
||||
if opts.referer is not None:
|
||||
std_headers['Referer'] = opts.referer
|
||||
|
||||
# Custom HTTP headers
|
||||
if opts.headers is not None:
|
||||
for h in opts.headers:
|
||||
if h.find(':', 1) < 0:
|
||||
parser.error(u'wrong header formatting, it should be key:value, not "%s"'%h)
|
||||
key, value = h.split(':', 2)
|
||||
if opts.verbose:
|
||||
write_string(u'[debug] Adding header from command line option %s:%s\n'%(key, value))
|
||||
std_headers[key] = value
|
||||
|
||||
# Dump user agent
|
||||
if opts.dump_user_agent:
|
||||
compat_print(std_headers['User-Agent'])
|
||||
@@ -611,7 +626,7 @@ def _real_main(argv=None):
|
||||
if opts.usetitle and opts.useid:
|
||||
parser.error(u'using title conflicts with using video ID')
|
||||
if opts.username is not None and opts.password is None:
|
||||
opts.password = getpass.getpass(u'Type account password and press return:')
|
||||
opts.password = compat_getpass(u'Type account password and press [Return]: ')
|
||||
if opts.ratelimit is not None:
|
||||
numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
|
||||
if numeric_limit is None:
|
||||
@@ -756,6 +771,7 @@ def _real_main(argv=None):
|
||||
'download_archive': download_archive_fn,
|
||||
'cookiefile': opts.cookiefile,
|
||||
'nocheckcertificate': opts.no_check_certificate,
|
||||
'prefer_insecure': opts.prefer_insecure,
|
||||
'proxy': opts.proxy,
|
||||
'socket_timeout': opts.socket_timeout,
|
||||
'bidi_workaround': opts.bidi_workaround,
|
||||
|
@@ -13,8 +13,10 @@ class HlsFD(FileDownloader):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
|
||||
'-bsf:a', 'aac_adtstoasc', tmpfilename]
|
||||
args = [
|
||||
'-y', '-i', url, '-f', 'mp4', '-c', 'copy',
|
||||
'-bsf:a', 'aac_adtstoasc',
|
||||
encodeFilename(tmpfilename, for_subprocess=True)]
|
||||
|
||||
for program in ['avconv', 'ffmpeg']:
|
||||
try:
|
||||
|
@@ -2,6 +2,7 @@ from .academicearth import AcademicEarthCourseIE
|
||||
from .addanime import AddAnimeIE
|
||||
from .aftonbladet import AftonbladetIE
|
||||
from .anitube import AnitubeIE
|
||||
from .aol import AolIE
|
||||
from .aparat import AparatIE
|
||||
from .appletrailers import AppleTrailersIE
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
@@ -10,8 +11,10 @@ from .arte import (
|
||||
ArteTvIE,
|
||||
ArteTVPlus7IE,
|
||||
ArteTVCreativeIE,
|
||||
ArteTVConcertIE,
|
||||
ArteTVFutureIE,
|
||||
ArteTVDDCIE,
|
||||
ArteTVEmbedIE,
|
||||
)
|
||||
from .auengine import AUEngineIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
@@ -23,6 +26,7 @@ from .bloomberg import BloombergIE
|
||||
from .br import BRIE
|
||||
from .breakcom import BreakIE
|
||||
from .brightcove import BrightcoveIE
|
||||
from .byutv import BYUtvIE
|
||||
from .c56 import C56IE
|
||||
from .canal13cl import Canal13clIE
|
||||
from .canalplus import CanalplusIE
|
||||
@@ -63,6 +67,7 @@ from .ehow import EHowIE
|
||||
from .eighttracks import EightTracksIE
|
||||
from .eitb import EitbIE
|
||||
from .elpais import ElPaisIE
|
||||
from .engadget import EngadgetIE
|
||||
from .escapist import EscapistIE
|
||||
from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .exfm import ExfmIE
|
||||
@@ -71,6 +76,7 @@ from .facebook import FacebookIE
|
||||
from .faz import FazIE
|
||||
from .firstpost import FirstpostIE
|
||||
from .firsttv import FirstTVIE
|
||||
from .fivemin import FiveMinIE
|
||||
from .fktv import (
|
||||
FKTVIE,
|
||||
FKTVPosteckeIE,
|
||||
@@ -108,7 +114,7 @@ from .imdb import (
|
||||
)
|
||||
from .ina import InaIE
|
||||
from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE
|
||||
from .instagram import InstagramIE, InstagramUserIE
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
from .iprima import IPrimaIE
|
||||
from .ivi import (
|
||||
@@ -173,6 +179,7 @@ from .nowness import NownessIE
|
||||
from .nowvideo import NowVideoIE
|
||||
from .ooyala import OoyalaIE
|
||||
from .orf import ORFIE
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .pbs import PBSIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .playvid import PlayvidIE
|
||||
@@ -190,6 +197,7 @@ from .ro220 import Ro220IE
|
||||
from .rottentomatoes import RottenTomatoesIE
|
||||
from .roxwel import RoxwelIE
|
||||
from .rtlnow import RTLnowIE
|
||||
from .rts import RTSIE
|
||||
from .rutube import (
|
||||
RutubeIE,
|
||||
RutubeChannelIE,
|
||||
@@ -234,6 +242,7 @@ from .theplatform import ThePlatformIE
|
||||
from .thisav import ThisAVIE
|
||||
from .tinypic import TinyPicIE
|
||||
from .toutv import TouTvIE
|
||||
from .toypics import ToypicsUserIE, ToypicsIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
from .trilulilu import TriluliluIE
|
||||
from .trutube import TruTubeIE
|
||||
@@ -258,6 +267,7 @@ from .vice import ViceIE
|
||||
from .viddler import ViddlerIE
|
||||
from .videobam import VideoBamIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
from .videolecturesnet import VideoLecturesNetIE
|
||||
from .videofyme import VideofyMeIE
|
||||
from .videopremium import VideoPremiumIE
|
||||
from .vimeo import (
|
||||
@@ -272,16 +282,18 @@ from .vine import VineIE
|
||||
from .viki import VikiIE
|
||||
from .vk import VKIE
|
||||
from .vube import VubeIE
|
||||
from .washingtonpost import WashingtonPostIE
|
||||
from .wat import WatIE
|
||||
from .wdr import WDRIE
|
||||
from .weibo import WeiboIE
|
||||
from .wimp import WimpIE
|
||||
from .wistia import WistiaIE
|
||||
from .worldstarhiphop import WorldStarHipHopIE
|
||||
from .xbef import XBefIE
|
||||
from .xhamster import XHamsterIE
|
||||
from .xnxx import XNXXIE
|
||||
from .xvideos import XVideosIE
|
||||
from .xtube import XTubeIE
|
||||
from .xtube import XTubeUserIE, XTubeIE
|
||||
from .yahoo import (
|
||||
YahooIE,
|
||||
YahooNewsIE,
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -14,14 +16,14 @@ from ..utils import (
|
||||
class AddAnimeIE(InfoExtractor):
|
||||
|
||||
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
|
||||
IE_NAME = u'AddAnime'
|
||||
_TEST = {
|
||||
u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
|
||||
u'file': u'24MR3YO5SAS9.mp4',
|
||||
u'md5': u'72954ea10bc979ab5e2eb288b21425a0',
|
||||
u'info_dict': {
|
||||
u"description": u"One Piece 606",
|
||||
u"title": u"One Piece 606"
|
||||
'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
|
||||
'md5': '72954ea10bc979ab5e2eb288b21425a0',
|
||||
'info_dict': {
|
||||
'id': '24MR3YO5SAS9',
|
||||
'ext': 'mp4',
|
||||
'description': 'One Piece 606',
|
||||
'title': 'One Piece 606',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,10 +40,10 @@ class AddAnimeIE(InfoExtractor):
|
||||
redir_webpage = ee.cause.read().decode('utf-8')
|
||||
action = self._search_regex(
|
||||
r'<form id="challenge-form" action="([^"]+)"',
|
||||
redir_webpage, u'Redirect form')
|
||||
redir_webpage, 'Redirect form')
|
||||
vc = self._search_regex(
|
||||
r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>',
|
||||
redir_webpage, u'redirect vc value')
|
||||
redir_webpage, 'redirect vc value')
|
||||
av = re.search(
|
||||
r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
|
||||
redir_webpage)
|
||||
@@ -52,19 +54,19 @@ class AddAnimeIE(InfoExtractor):
|
||||
parsed_url = compat_urllib_parse_urlparse(url)
|
||||
av_val = av_res + len(parsed_url.netloc)
|
||||
confirm_url = (
|
||||
parsed_url.scheme + u'://' + parsed_url.netloc +
|
||||
parsed_url.scheme + '://' + parsed_url.netloc +
|
||||
action + '?' +
|
||||
compat_urllib_parse.urlencode({
|
||||
'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
|
||||
self._download_webpage(
|
||||
confirm_url, video_id,
|
||||
note=u'Confirming after redirect')
|
||||
note='Confirming after redirect')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats = []
|
||||
for format_id in ('normal', 'hq'):
|
||||
rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
|
||||
video_url = self._search_regex(rex, webpage, u'video file URLx',
|
||||
video_url = self._search_regex(rex, webpage, 'video file URLx',
|
||||
fatal=False)
|
||||
if not video_url:
|
||||
continue
|
||||
@@ -72,14 +74,13 @@ class AddAnimeIE(InfoExtractor):
|
||||
'format_id': format_id,
|
||||
'url': video_url,
|
||||
})
|
||||
if not formats:
|
||||
raise ExtractorError(u'Cannot find any video format!')
|
||||
self._sort_formats(formats)
|
||||
video_title = self._og_search_title(webpage)
|
||||
video_description = self._og_search_description(webpage)
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': video_title,
|
||||
'description': video_description
|
||||
|
28
youtube_dl/extractor/aol.py
Normal file
28
youtube_dl/extractor/aol.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .fivemin import FiveMinIE
|
||||
|
||||
|
||||
class AolIE(InfoExtractor):
|
||||
IE_NAME = 'on.aol.com'
|
||||
_VALID_URL = r'http://on\.aol\.com/video/.*-(?P<id>\d+)($|\?)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
|
||||
'md5': '18ef68f48740e86ae94b98da815eec42',
|
||||
'info_dict': {
|
||||
'id': '518167793',
|
||||
'ext': 'mp4',
|
||||
'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
|
||||
},
|
||||
'add_ie': ['FiveMin'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
self.to_screen('Downloading 5min.com video %s' % video_id)
|
||||
return FiveMinIE._build_result(video_id)
|
@@ -2,7 +2,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -19,119 +18,46 @@ from ..utils import (
|
||||
# is different for each one. The videos usually expire in 7 days, so we can't
|
||||
# add tests.
|
||||
|
||||
class ArteTvIE(InfoExtractor):
|
||||
_VIDEOS_URL = r'(?:http://)?videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html'
|
||||
_LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
|
||||
_LIVE_URL = r'index-[0-9]+\.html$'
|
||||
|
||||
class ArteTvIE(InfoExtractor):
|
||||
_VALID_URL = r'http://videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html'
|
||||
IE_NAME = 'arte.tv'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return any(re.match(regex, url) for regex in (cls._VIDEOS_URL, cls._LIVEWEB_URL))
|
||||
|
||||
# TODO implement Live Stream
|
||||
# from ..utils import compat_urllib_parse
|
||||
# def extractLiveStream(self, url):
|
||||
# video_lang = url.split('/')[-4]
|
||||
# info = self.grep_webpage(
|
||||
# url,
|
||||
# r'src="(.*?/videothek_js.*?\.js)',
|
||||
# 0,
|
||||
# [
|
||||
# (1, 'url', 'Invalid URL: %s' % url)
|
||||
# ]
|
||||
# )
|
||||
# http_host = url.split('/')[2]
|
||||
# next_url = 'http://%s%s' % (http_host, compat_urllib_parse.unquote(info.get('url')))
|
||||
# info = self.grep_webpage(
|
||||
# next_url,
|
||||
# r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' +
|
||||
# '(http://.*?\.swf).*?' +
|
||||
# '(rtmp://.*?)\'',
|
||||
# re.DOTALL,
|
||||
# [
|
||||
# (1, 'path', 'could not extract video path: %s' % url),
|
||||
# (2, 'player', 'could not extract video player: %s' % url),
|
||||
# (3, 'url', 'could not extract video url: %s' % url)
|
||||
# ]
|
||||
# )
|
||||
# video_url = '%s/%s' % (info.get('url'), info.get('path'))
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VIDEOS_URL, url)
|
||||
if mobj is not None:
|
||||
id = mobj.group('id')
|
||||
lang = mobj.group('lang')
|
||||
return self._extract_video(url, id, lang)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
lang = mobj.group('lang')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
mobj = re.match(self._LIVEWEB_URL, url)
|
||||
if mobj is not None:
|
||||
name = mobj.group('name')
|
||||
lang = mobj.group('lang')
|
||||
return self._extract_liveweb(url, name, lang)
|
||||
|
||||
if re.search(self._LIVE_URL, url) is not None:
|
||||
raise ExtractorError('Arte live streams are not yet supported, sorry')
|
||||
# self.extractLiveStream(url)
|
||||
# return
|
||||
|
||||
raise ExtractorError('No video found')
|
||||
|
||||
def _extract_video(self, url, video_id, lang):
|
||||
"""Extract from videos.arte.tv"""
|
||||
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
||||
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
||||
ref_xml_doc = self._download_xml(
|
||||
ref_xml_url, video_id, note='Downloading metadata')
|
||||
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
|
||||
config_xml_url = config_node.attrib['ref']
|
||||
config_xml = self._download_webpage(
|
||||
config = self._download_xml(
|
||||
config_xml_url, video_id, note='Downloading configuration')
|
||||
|
||||
video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml))
|
||||
def _key(m):
|
||||
quality = m.group('quality')
|
||||
if quality == 'hd':
|
||||
return 2
|
||||
else:
|
||||
return 1
|
||||
# We pick the best quality
|
||||
video_urls = sorted(video_urls, key=_key)
|
||||
video_url = list(video_urls)[-1].group('url')
|
||||
|
||||
title = self._html_search_regex(r'<name>(.*?)</name>', config_xml, 'title')
|
||||
thumbnail = self._html_search_regex(r'<firstThumbnailUrl>(.*?)</firstThumbnailUrl>',
|
||||
config_xml, 'thumbnail')
|
||||
return {'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
}
|
||||
formats = [{
|
||||
'forma_id': q.attrib['quality'],
|
||||
'url': q.text,
|
||||
'ext': 'flv',
|
||||
'quality': 2 if q.attrib['quality'] == 'hd' else 1,
|
||||
} for q in config.findall('./urls/url')]
|
||||
self._sort_formats(formats)
|
||||
|
||||
def _extract_liveweb(self, url, name, lang):
|
||||
"""Extract form http://liveweb.arte.tv/"""
|
||||
webpage = self._download_webpage(url, name)
|
||||
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, 'event id')
|
||||
config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
|
||||
video_id, 'Downloading information')
|
||||
event_doc = config_doc.find('event')
|
||||
url_node = event_doc.find('video').find('urlHd')
|
||||
if url_node is None:
|
||||
url_node = event_doc.find('urlSd')
|
||||
|
||||
return {'id': video_id,
|
||||
'title': event_doc.find('name%s' % lang.capitalize()).text,
|
||||
'url': url_node.text.replace('MP4', 'mp4'),
|
||||
'ext': 'flv',
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
title = config.find('.//name').text
|
||||
thumbnail = config.find('.//firstThumbnailUrl').text
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class ArteTVPlus7IE(InfoExtractor):
|
||||
IE_NAME = 'arte.tv:+7'
|
||||
_VALID_URL = r'https?://www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
|
||||
|
||||
@classmethod
|
||||
def _extract_url_info(cls, url):
|
||||
@@ -152,9 +78,7 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
||||
def _extract_from_json_url(self, json_url, video_id, lang):
|
||||
json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
|
||||
self.report_extraction(video_id)
|
||||
info = json.loads(json_info)
|
||||
info = self._download_json(json_url, video_id)
|
||||
player_info = info['videoJsonPlayer']
|
||||
|
||||
info_dict = {
|
||||
@@ -176,6 +100,8 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
l = 'F'
|
||||
elif lang == 'de':
|
||||
l = 'A'
|
||||
else:
|
||||
l = lang
|
||||
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
|
||||
return any(re.match(r, f['versionCode']) for r in regexes)
|
||||
# Some formats may not be in the same language as the url
|
||||
@@ -202,6 +128,8 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None,
|
||||
# The version with sourds/mal subtitles has also lower relevance
|
||||
re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None,
|
||||
# Prefer http downloads over m3u8
|
||||
0 if f['url'].endswith('m3u8') else 1,
|
||||
)
|
||||
formats = sorted(formats, key=sort_key)
|
||||
def _format(format_info):
|
||||
@@ -242,8 +170,9 @@ class ArteTVCreativeIE(ArteTVPlus7IE):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
|
||||
'file': '050489-002.mp4',
|
||||
'info_dict': {
|
||||
'id': '050489-002',
|
||||
'ext': 'mp4',
|
||||
'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design',
|
||||
},
|
||||
}
|
||||
@@ -255,8 +184,9 @@ class ArteTVFutureIE(ArteTVPlus7IE):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
|
||||
'file': '050940-003.mp4',
|
||||
'info_dict': {
|
||||
'id': '050940-003',
|
||||
'ext': 'mp4',
|
||||
'title': 'Les champignons au secours de la planète',
|
||||
},
|
||||
}
|
||||
@@ -270,7 +200,7 @@ class ArteTVFutureIE(ArteTVPlus7IE):
|
||||
|
||||
class ArteTVDDCIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:ddc'
|
||||
_VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
|
||||
_VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, lang = self._extract_url_info(url)
|
||||
@@ -284,3 +214,39 @@ class ArteTVDDCIE(ArteTVPlus7IE):
|
||||
javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator')
|
||||
json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url')
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
||||
|
||||
class ArteTVConcertIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:concert'
|
||||
_VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>de|fr)/(?P<id>.+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde',
|
||||
'md5': '9ea035b7bd69696b67aa2ccaaa218161',
|
||||
'info_dict': {
|
||||
'id': '186',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Notwist im Pariser Konzertclub "Divan du Monde"',
|
||||
'upload_date': '20140128',
|
||||
'description': 'md5:486eb08f991552ade77439fe6d82c305',
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class ArteTVEmbedIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:embed'
|
||||
_VALID_URL = r'''(?x)
|
||||
http://www\.arte\.tv
|
||||
/playerv2/embed\.php\?json_url=
|
||||
(?P<json_url>
|
||||
http://arte\.tv/papi/tvguide/videos/stream/player/
|
||||
(?P<lang>[^/]+)/(?P<id>[^/]+)[^&]*
|
||||
)
|
||||
'''
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
lang = mobj.group('lang')
|
||||
json_url = mobj.group('json_url')
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
50
youtube_dl/extractor/byutv.py
Normal file
50
youtube_dl/extractor/byutv.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class BYUtvIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.byutv.org/watch/44e80f7b-e3ba-43ba-8c51-b1fd96c94a79/granite-flats-talking',
|
||||
'info_dict': {
|
||||
'id': 'granite-flats-talking',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:1a7ae3e153359b7cc355ef3963441e5f',
|
||||
'title': 'Talking',
|
||||
'thumbnail': 're:^https?://.*promo.*'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
episode_code = self._search_regex(
|
||||
r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')
|
||||
episode_json = re.sub(
|
||||
r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', episode_code)
|
||||
ep = json.loads(episode_json)
|
||||
|
||||
if ep['providerType'] == 'Ooyala':
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Ooyala',
|
||||
'url': 'ooyala:%s' % ep['providerId'],
|
||||
'id': video_id,
|
||||
'title': ep['title'],
|
||||
'description': ep.get('description'),
|
||||
'thumbnail': ep.get('imageThumbnail'),
|
||||
}
|
||||
else:
|
||||
raise ExtractorError('Unsupported provider %s' % ep['provider'])
|
@@ -9,12 +9,12 @@ from ..utils import (
|
||||
|
||||
|
||||
class CinemassacreIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?'
|
||||
_VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
|
||||
'file': '19911.mp4',
|
||||
'md5': 'fde81fbafaee331785f58cd6c0d46190',
|
||||
'md5': '782f8504ca95a0eba8fc9177c373eec7',
|
||||
'info_dict': {
|
||||
'upload_date': '20121110',
|
||||
'title': '“Angry Video Game Nerd: The Movie” – Trailer',
|
||||
@@ -24,7 +24,7 @@ class CinemassacreIE(InfoExtractor):
|
||||
{
|
||||
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
|
||||
'file': '521be8ef82b16.mp4',
|
||||
'md5': 'd72f10cd39eac4215048f62ab477a511',
|
||||
'md5': 'dec39ee5118f8d9cc067f45f9cbe3a35',
|
||||
'info_dict': {
|
||||
'upload_date': '20131002',
|
||||
'title': 'The Mummy’s Hand (1940)',
|
||||
@@ -34,8 +34,9 @@ class CinemassacreIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, None) # Don't know video id yet
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
|
||||
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
|
||||
if not mobj:
|
||||
@@ -43,33 +44,36 @@ class CinemassacreIE(InfoExtractor):
|
||||
playerdata_url = mobj.group('embed_url')
|
||||
video_id = mobj.group('video_id')
|
||||
|
||||
video_title = self._html_search_regex(r'<title>(?P<title>.+?)\|',
|
||||
webpage, 'title')
|
||||
video_description = self._html_search_regex(r'<div class="entry-content">(?P<description>.+?)</div>',
|
||||
video_title = self._html_search_regex(
|
||||
r'<title>(?P<title>.+?)\|', webpage, 'title')
|
||||
video_description = self._html_search_regex(
|
||||
r'<div class="entry-content">(?P<description>.+?)</div>',
|
||||
webpage, 'description', flags=re.DOTALL, fatal=False)
|
||||
if len(video_description) == 0:
|
||||
video_description = None
|
||||
|
||||
playerdata = self._download_webpage(playerdata_url, video_id)
|
||||
|
||||
sd_url = self._html_search_regex(r'file: \'(?P<sd_file>[^\']+)\', label: \'SD\'', playerdata, 'sd_file')
|
||||
hd_url = self._html_search_regex(r'file: \'(?P<hd_file>[^\']+)\', label: \'HD\'', playerdata, 'hd_file')
|
||||
sd_url = self._html_search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file')
|
||||
hd_url = self._html_search_regex(
|
||||
r'file: \'([^\']+)\', label: \'HD\'', playerdata, 'hd_file',
|
||||
default=None)
|
||||
video_thumbnail = self._html_search_regex(r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False)
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': sd_url,
|
||||
'ext': 'mp4',
|
||||
'format': 'sd',
|
||||
'format_id': 'sd',
|
||||
},
|
||||
{
|
||||
formats = [{
|
||||
'url': sd_url,
|
||||
'ext': 'mp4',
|
||||
'format': 'sd',
|
||||
'format_id': 'sd',
|
||||
'quality': 1,
|
||||
}]
|
||||
if hd_url:
|
||||
formats.append({
|
||||
'url': hd_url,
|
||||
'ext': 'mp4',
|
||||
'format': 'hd',
|
||||
'format_id': 'hd',
|
||||
},
|
||||
]
|
||||
'quality': 2,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -1,22 +1,28 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class ClipfishIE(InfoExtractor):
|
||||
IE_NAME = u'clipfish'
|
||||
IE_NAME = 'clipfish'
|
||||
|
||||
_VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
|
||||
_TEST = {
|
||||
u'url': u'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
|
||||
u'file': u'3966754.mp4',
|
||||
u'md5': u'2521cd644e862936cf2e698206e47385',
|
||||
u'info_dict': {
|
||||
u'title': u'FIFA 14 - E3 2013 Trailer',
|
||||
u'duration': 82,
|
||||
'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
|
||||
'md5': '2521cd644e862936cf2e698206e47385',
|
||||
'info_dict': {
|
||||
'id': '3966754',
|
||||
'ext': 'mp4',
|
||||
'title': 'FIFA 14 - E3 2013 Trailer',
|
||||
'duration': 82,
|
||||
},
|
||||
u'skip': 'Blocked in the US'
|
||||
}
|
||||
@@ -33,21 +39,10 @@ class ClipfishIE(InfoExtractor):
|
||||
video_url = doc.find('filename').text
|
||||
if video_url is None:
|
||||
xml_bytes = xml.etree.ElementTree.tostring(doc)
|
||||
raise ExtractorError(u'Cannot find video URL in document %r' %
|
||||
raise ExtractorError('Cannot find video URL in document %r' %
|
||||
xml_bytes)
|
||||
thumbnail = doc.find('imageurl').text
|
||||
duration_str = doc.find('duration').text
|
||||
m = re.match(
|
||||
r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
|
||||
duration_str)
|
||||
if m:
|
||||
duration = (
|
||||
(int(m.group('hours')) * 60 * 60) +
|
||||
(int(m.group('minutes')) * 60) +
|
||||
(int(m.group('seconds')))
|
||||
)
|
||||
else:
|
||||
duration = None
|
||||
duration = parse_duration(doc.find('duration').text)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?comedycentral\.com/
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(comedycentral|cc)\.com/
|
||||
(video-clips|episodes|cc-studios|video-collections)
|
||||
/(?P<title>.*)'''
|
||||
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
|
||||
|
@@ -74,7 +74,7 @@ class InfoExtractor(object):
|
||||
"http", "https", "rtsp", "rtmp", "m3u8" or so.
|
||||
* preference Order number of this format. If this field is
|
||||
present and not None, the formats get sorted
|
||||
by this field.
|
||||
by this field, regardless of all other values.
|
||||
-1 for default (order by other properties),
|
||||
-2 or smaller for less than default.
|
||||
* quality Order number of the video quality of this
|
||||
|
@@ -10,9 +10,9 @@ from ..utils import (
|
||||
|
||||
|
||||
class CSpanIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>\d+)'
|
||||
_VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
|
||||
IE_DESC = 'C-SPAN'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
|
||||
'md5': '8e44ce11f0f725527daccc453f553eb0',
|
||||
'info_dict': {
|
||||
@@ -22,13 +22,24 @@ class CSpanIE(InfoExtractor):
|
||||
'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
|
||||
},
|
||||
'skip': 'Regularly fails on travis, for unknown reasons',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
|
||||
# For whatever reason, the served video alternates between
|
||||
# two different ones
|
||||
#'md5': 'dbb0f047376d457f2ab8b3929cbb2d0c',
|
||||
'info_dict': {
|
||||
'id': '340723',
|
||||
'ext': 'mp4',
|
||||
'title': 'International Health Care Models',
|
||||
'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
video_id = self._search_regex(r'data-progid=\'(\d+)\'>', webpage, 'video id')
|
||||
video_id = self._search_regex(r'progid=\'?([0-9]+)\'?>', webpage, 'video id')
|
||||
|
||||
description = self._html_search_regex(
|
||||
[
|
||||
|
@@ -1,25 +1,28 @@
|
||||
# encoding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class DaumIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
|
||||
IE_NAME = u'daum.net'
|
||||
IE_NAME = 'daum.net'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
|
||||
u'file': u'52554690.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'DOTA 2GETHER 시즌2 6회 - 2부',
|
||||
u'description': u'DOTA 2GETHER 시즌2 6회 - 2부',
|
||||
u'upload_date': u'20130831',
|
||||
u'duration': 3868,
|
||||
'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
|
||||
'info_dict': {
|
||||
'id': '52554690',
|
||||
'ext': 'mp4',
|
||||
'title': 'DOTA 2GETHER 시즌2 6회 - 2부',
|
||||
'description': 'DOTA 2GETHER 시즌2 6회 - 2부',
|
||||
'upload_date': '20130831',
|
||||
'duration': 3868,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -30,14 +33,14 @@ class DaumIE(InfoExtractor):
|
||||
webpage = self._download_webpage(canonical_url, video_id)
|
||||
full_id = self._search_regex(
|
||||
r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]',
|
||||
webpage, u'full id')
|
||||
webpage, 'full id')
|
||||
query = compat_urllib_parse.urlencode({'vid': full_id})
|
||||
info = self._download_xml(
|
||||
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
||||
u'Downloading video info')
|
||||
'Downloading video info')
|
||||
urls = self._download_xml(
|
||||
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
|
||||
video_id, u'Downloading video formats info')
|
||||
video_id, 'Downloading video formats info')
|
||||
|
||||
self.to_screen(u'%s: Getting video urls' % video_id)
|
||||
formats = []
|
||||
@@ -53,7 +56,6 @@ class DaumIE(InfoExtractor):
|
||||
format_url = url_doc.find('result/url').text
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'ext': determine_ext(format_url),
|
||||
'format_id': profile,
|
||||
})
|
||||
|
||||
|
43
youtube_dl/extractor/engadget.py
Normal file
43
youtube_dl/extractor/engadget.py
Normal file
@@ -0,0 +1,43 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .fivemin import FiveMinIE
|
||||
from ..utils import (
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class EngadgetIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://www.engadget.com/
|
||||
(?:video/5min/(?P<id>\d+)|
|
||||
[\d/]+/.*?)
|
||||
'''
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.engadget.com/video/5min/518153925/',
|
||||
'md5': 'c6820d4828a5064447a4d9fc73f312c9',
|
||||
'info_dict': {
|
||||
'id': '518153925',
|
||||
'ext': 'mp4',
|
||||
'title': 'Samsung Galaxy Tab Pro 8.4 Review',
|
||||
},
|
||||
'add_ie': ['FiveMin'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
if video_id is not None:
|
||||
return FiveMinIE._build_result(video_id)
|
||||
else:
|
||||
title = url_basename(url)
|
||||
webpage = self._download_webpage(url, title)
|
||||
ids = re.findall(r'<iframe[^>]+?playList=(\d+)', webpage)
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'title': title,
|
||||
'entries': [FiveMinIE._build_result(id) for id in ids]
|
||||
}
|
56
youtube_dl/extractor/fivemin.py
Normal file
56
youtube_dl/extractor/fivemin.py
Normal file
@@ -0,0 +1,56 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
)
|
||||
|
||||
|
||||
class FiveMinIE(InfoExtractor):
|
||||
IE_NAME = '5min'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(.*?&)?playList=|
|
||||
5min:)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
|
||||
_TEST = {
|
||||
# From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/
|
||||
'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791',
|
||||
'md5': '4f7b0b79bf1a470e5004f7112385941d',
|
||||
'info_dict': {
|
||||
'id': '518013791',
|
||||
'ext': 'mp4',
|
||||
'title': 'iPad Mini with Retina Display Review',
|
||||
},
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _build_result(cls, video_id):
|
||||
return cls.url_result('5min:%s' % video_id, cls.ie_key())
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
info = self._download_json(
|
||||
'https://syn.5min.com/handlers/SenseHandler.ashx?func=GetResults&'
|
||||
'playlist=%s&url=https' % video_id,
|
||||
video_id)['binding'][0]
|
||||
|
||||
second_id = compat_str(int(video_id[:-2]) + 1)
|
||||
formats = []
|
||||
for quality, height in [(1, 320), (2, 480), (4, 720), (8, 1080)]:
|
||||
if any(r['ID'] == quality for r in info['Renditions']):
|
||||
formats.append({
|
||||
'format_id': compat_str(quality),
|
||||
'url': 'http://avideos.5min.com/%s/%s/%s_%s.mp4' % (second_id[-3:], second_id, video_id, quality),
|
||||
'height': height,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['Title'],
|
||||
'formats': formats,
|
||||
}
|
@@ -102,6 +102,20 @@ class GenericIE(InfoExtractor):
|
||||
'title': '2cc213299525360.mov', # that's what we get
|
||||
},
|
||||
},
|
||||
# second style of embedded ooyala videos
|
||||
{
|
||||
'url': 'http://www.smh.com.au/tv/business/show/financial-review-sunday/behind-the-scenes-financial-review-sunday--4350201.html',
|
||||
'info_dict': {
|
||||
'id': '13djJjYjptA1XpPx8r9kuzPyj3UZH0Uk',
|
||||
'ext': 'mp4',
|
||||
'title': 'Behind-the-scenes: Financial Review Sunday ',
|
||||
'description': 'Step inside Channel Nine studios for an exclusive tour of its upcoming financial business show.',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# google redirect
|
||||
{
|
||||
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
||||
@@ -159,7 +173,45 @@ class GenericIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
},
|
||||
# Embedded TED video
|
||||
{
|
||||
'url': 'http://en.support.wordpress.com/videos/ted-talks/',
|
||||
'md5': 'deeeabcc1085eb2ba205474e7235a3d5',
|
||||
'info_dict': {
|
||||
'id': '981',
|
||||
'ext': 'mp4',
|
||||
'title': 'My web playroom',
|
||||
'uploader': 'Ze Frank',
|
||||
'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
|
||||
}
|
||||
},
|
||||
# nowvideo embed hidden behind percent encoding
|
||||
{
|
||||
'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
|
||||
'md5': '2baf4ddd70f697d94b1c18cf796d5107',
|
||||
'info_dict': {
|
||||
'id': '06e53103ca9aa',
|
||||
'ext': 'flv',
|
||||
'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
|
||||
'description': 'No description',
|
||||
},
|
||||
},
|
||||
# arte embed
|
||||
{
|
||||
'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
|
||||
'md5': '7653032cbb25bf6c80d80f217055fa43',
|
||||
'info_dict': {
|
||||
'id': '048195-004_PLUS7-F',
|
||||
'ext': 'flv',
|
||||
'title': 'X:enius',
|
||||
'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
|
||||
'upload_date': '20140320',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Requires rtmpdump'
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def report_download_webpage(self, video_id):
|
||||
@@ -185,9 +237,14 @@ class GenericIE(InfoExtractor):
|
||||
newurl = newurl.replace(' ', '%20')
|
||||
newheaders = dict((k,v) for k,v in req.headers.items()
|
||||
if k.lower() not in ("content-length", "content-type"))
|
||||
try:
|
||||
# This function was deprecated in python 3.3 and removed in 3.4
|
||||
origin_req_host = req.get_origin_req_host()
|
||||
except AttributeError:
|
||||
origin_req_host = req.origin_req_host
|
||||
return HEADRequest(newurl,
|
||||
headers=newheaders,
|
||||
origin_req_host=req.get_origin_req_host(),
|
||||
origin_req_host=origin_req_host,
|
||||
unverifiable=True)
|
||||
else:
|
||||
raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp)
|
||||
@@ -306,6 +363,11 @@ class GenericIE(InfoExtractor):
|
||||
except compat_xml_parse_error:
|
||||
pass
|
||||
|
||||
# Sometimes embedded video player is hidden behind percent encoding
|
||||
# (e.g. https://github.com/rg3/youtube-dl/issues/2448)
|
||||
# Unescaping the whole page allows to handle those cases in a generic way
|
||||
webpage = compat_urllib_parse.unquote(webpage)
|
||||
|
||||
# it's tempting to parse this further, but you would
|
||||
# have to take into account all the variations like
|
||||
# Video Title - Site Name
|
||||
@@ -407,9 +469,10 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for Ooyala videos
|
||||
mobj = re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=([^"&]+)', webpage)
|
||||
mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||
re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
||||
if mobj is not None:
|
||||
return OoyalaIE._build_url_result(mobj.group(1))
|
||||
return OoyalaIE._build_url_result(mobj.group('ec'))
|
||||
|
||||
# Look for Aparat videos
|
||||
mobj = re.search(r'<iframe src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
|
||||
@@ -471,6 +534,19 @@ class GenericIE(InfoExtractor):
|
||||
if rutv_url:
|
||||
return self.url_result(rutv_url, 'RUTV')
|
||||
|
||||
# Look for embedded TED player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'TED')
|
||||
|
||||
# Look for embedded arte.tv player
|
||||
mobj = re.search(
|
||||
r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'ArteTVEmbed')
|
||||
|
||||
# Start with something easy: JW Player in SWFObject
|
||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||
if mobj is None:
|
||||
@@ -482,6 +558,7 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is None:
|
||||
# Broaden the search a little bit: JWPlayer JS loader
|
||||
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
|
||||
|
||||
if mobj is None:
|
||||
# Try to find twitter cards info
|
||||
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
|
||||
|
@@ -3,6 +3,9 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class InstagramIE(InfoExtractor):
|
||||
@@ -37,3 +40,68 @@ class InstagramIE(InfoExtractor):
|
||||
'uploader_id': uploader_id,
|
||||
'description': desc,
|
||||
}
|
||||
|
||||
|
||||
class InstagramUserIE(InfoExtractor):
|
||||
_VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
|
||||
IE_DESC = 'Instagram user profile'
|
||||
IE_NAME = 'instagram:user'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
uploader_id = mobj.group('username')
|
||||
|
||||
entries = []
|
||||
page_count = 0
|
||||
media_url = 'http://instagram.com/%s/media' % uploader_id
|
||||
while True:
|
||||
page = self._download_json(
|
||||
media_url, uploader_id,
|
||||
note='Downloading page %d ' % (page_count + 1),
|
||||
)
|
||||
page_count += 1
|
||||
|
||||
for it in page['items']:
|
||||
if it.get('type') != 'video':
|
||||
continue
|
||||
like_count = int_or_none(it.get('likes', {}).get('count'))
|
||||
user = it.get('user', {})
|
||||
|
||||
formats = [{
|
||||
'format_id': k,
|
||||
'height': v.get('height'),
|
||||
'width': v.get('width'),
|
||||
'url': v['url'],
|
||||
} for k, v in it['videos'].items()]
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails_el = it.get('images', {})
|
||||
thumbnail = thumbnails_el.get('thumbnail', {}).get('url')
|
||||
|
||||
title = it.get('caption', {}).get('text', it['id'])
|
||||
|
||||
entries.append({
|
||||
'id': it['id'],
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'webpage_url': it.get('link'),
|
||||
'uploader': user.get('full_name'),
|
||||
'uploader_id': user.get('username'),
|
||||
'like_count': like_count,
|
||||
'timestamp': int_or_none(it.get('created_time')),
|
||||
})
|
||||
|
||||
if not page['items']:
|
||||
break
|
||||
max_id = page['items'][-1]['id']
|
||||
media_url = (
|
||||
'http://instagram.com/%s/media?max_id=%s' % (
|
||||
uploader_id, max_id))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
'id': uploader_id,
|
||||
'title': uploader_id,
|
||||
}
|
||||
|
@@ -48,7 +48,7 @@ class IPrimaIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if re.search(r'Nemáte oprávnění přistupovat na tuto stránku.\s*</div>', webpage):
|
||||
if re.search(r'Nemáte oprávnění přistupovat na tuto stránku\.\s*</div>', webpage):
|
||||
raise ExtractorError(
|
||||
'%s said: You do not have permission to access this page' % self.IE_NAME, expected=True)
|
||||
|
||||
|
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class KontrTubeIE(InfoExtractor):
|
||||
@@ -32,27 +33,26 @@ class KontrTubeIE(InfoExtractor):
|
||||
|
||||
video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
|
||||
thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
|
||||
title = self._html_search_regex(r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage,
|
||||
'video title')
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage, 'video title')
|
||||
description = self._html_search_meta('description', webpage, 'video description')
|
||||
|
||||
mobj = re.search(r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
|
||||
webpage)
|
||||
mobj = re.search(
|
||||
r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', webpage)
|
||||
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
||||
|
||||
view_count = self._html_search_regex(r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage,
|
||||
'view count', fatal=False)
|
||||
view_count = int(view_count) if view_count is not None else None
|
||||
view_count = self._html_search_regex(
|
||||
r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage, 'view count', fatal=False)
|
||||
|
||||
comment_count = None
|
||||
comment_str = self._html_search_regex(r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count',
|
||||
fatal=False)
|
||||
comment_str = self._html_search_regex(
|
||||
r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count', fatal=False)
|
||||
if comment_str.startswith('комментариев нет'):
|
||||
comment_count = 0
|
||||
else:
|
||||
mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
|
||||
if mobj:
|
||||
comment_count = int(mobj.group('total'))
|
||||
comment_count = mobj.group('total')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -61,6 +61,6 @@ class KontrTubeIE(InfoExtractor):
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'view_count': int_or_none(view_count),
|
||||
'comment_count': int_or_none(comment_count),
|
||||
}
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -9,104 +11,103 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
class MetacafeIE(InfoExtractor):
|
||||
"""Information Extractor for metacafe.com."""
|
||||
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
|
||||
class MetacafeIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
|
||||
_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
|
||||
_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
|
||||
IE_NAME = u'metacafe'
|
||||
IE_NAME = 'metacafe'
|
||||
_TESTS = [
|
||||
# Youtube video
|
||||
{
|
||||
u"add_ie": ["Youtube"],
|
||||
u"url": u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
|
||||
u"file": u"_aUehQsCQtM.mp4",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20090102",
|
||||
u"title": u"The Electric Company | \"Short I\" | PBS KIDS GO!",
|
||||
u"description": u"md5:2439a8ef6d5a70e380c22f5ad323e5a8",
|
||||
u"uploader": u"PBS",
|
||||
u"uploader_id": u"PBS"
|
||||
}
|
||||
},
|
||||
# Normal metacafe video
|
||||
{
|
||||
u'url': u'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/',
|
||||
u'md5': u'6e0bca200eaad2552e6915ed6fd4d9ad',
|
||||
u'info_dict': {
|
||||
u'id': u'11121940',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'News: Stuff You Won\'t Do with Your PlayStation 4',
|
||||
u'uploader': u'ign',
|
||||
u'description': u'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.',
|
||||
# Youtube video
|
||||
{
|
||||
'add_ie': ['Youtube'],
|
||||
'url': 'http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/',
|
||||
'info_dict': {
|
||||
'id': '_aUehQsCQtM',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20090102',
|
||||
'title': 'The Electric Company | "Short I" | PBS KIDS GO!',
|
||||
'description': 'md5:2439a8ef6d5a70e380c22f5ad323e5a8',
|
||||
'uploader': 'PBS',
|
||||
'uploader_id': 'PBS'
|
||||
}
|
||||
},
|
||||
},
|
||||
# AnyClip video
|
||||
{
|
||||
u"url": u"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/",
|
||||
u"file": u"an-dVVXnuY7Jh77J.mp4",
|
||||
u"info_dict": {
|
||||
u"title": u"The Andromeda Strain (1971): Stop the Bomb Part 3",
|
||||
u"uploader": u"anyclip",
|
||||
u"description": u"md5:38c711dd98f5bb87acf973d573442e67",
|
||||
# Normal metacafe video
|
||||
{
|
||||
'url': 'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/',
|
||||
'md5': '6e0bca200eaad2552e6915ed6fd4d9ad',
|
||||
'info_dict': {
|
||||
'id': '11121940',
|
||||
'ext': 'mp4',
|
||||
'title': 'News: Stuff You Won\'t Do with Your PlayStation 4',
|
||||
'uploader': 'ign',
|
||||
'description': 'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.',
|
||||
},
|
||||
},
|
||||
},
|
||||
# age-restricted video
|
||||
{
|
||||
u'url': u'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/',
|
||||
u'md5': u'98dde7c1a35d02178e8ab7560fe8bd09',
|
||||
u'info_dict': {
|
||||
u'id': u'5186653',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.',
|
||||
u'uploader': u'Dwayne Pipe',
|
||||
u'description': u'md5:950bf4c581e2c059911fa3ffbe377e4b',
|
||||
u'age_limit': 18,
|
||||
# AnyClip video
|
||||
{
|
||||
'url': 'http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/',
|
||||
'info_dict': {
|
||||
'id': 'an-dVVXnuY7Jh77J',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Andromeda Strain (1971): Stop the Bomb Part 3',
|
||||
'uploader': 'anyclip',
|
||||
'description': 'md5:38c711dd98f5bb87acf973d573442e67',
|
||||
},
|
||||
},
|
||||
},
|
||||
# cbs video
|
||||
{
|
||||
u'url': u'http://www.metacafe.com/watch/cb-0rOxMBabDXN6/samsung_galaxy_note_2_samsungs_next_generation_phablet/',
|
||||
u'info_dict': {
|
||||
u'id': u'0rOxMBabDXN6',
|
||||
u'ext': u'flv',
|
||||
u'title': u'Samsung Galaxy Note 2: Samsung\'s next-generation phablet',
|
||||
u'description': u'md5:54d49fac53d26d5a0aaeccd061ada09d',
|
||||
u'duration': 129,
|
||||
# age-restricted video
|
||||
{
|
||||
'url': 'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/',
|
||||
'md5': '98dde7c1a35d02178e8ab7560fe8bd09',
|
||||
'info_dict': {
|
||||
'id': '5186653',
|
||||
'ext': 'mp4',
|
||||
'title': 'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.',
|
||||
'uploader': 'Dwayne Pipe',
|
||||
'description': 'md5:950bf4c581e2c059911fa3ffbe377e4b',
|
||||
'age_limit': 18,
|
||||
},
|
||||
},
|
||||
u'params': {
|
||||
# rtmp download
|
||||
u'skip_download': True,
|
||||
# cbs video
|
||||
{
|
||||
'url': 'http://www.metacafe.com/watch/cb-8VD4r_Zws8VP/open_this_is_face_the_nation_february_9/',
|
||||
'info_dict': {
|
||||
'id': '8VD4r_Zws8VP',
|
||||
'ext': 'flv',
|
||||
'title': 'Open: This is Face the Nation, February 9',
|
||||
'description': 'md5:8a9ceec26d1f7ed6eab610834cc1a476',
|
||||
'duration': 96,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def report_disclaimer(self):
|
||||
"""Report disclaimer retrieval."""
|
||||
self.to_screen(u'Retrieving disclaimer')
|
||||
self.to_screen('Retrieving disclaimer')
|
||||
|
||||
def _real_initialize(self):
|
||||
# Retrieve disclaimer
|
||||
self.report_disclaimer()
|
||||
self._download_webpage(self._DISCLAIMER, None, False, u'Unable to retrieve disclaimer')
|
||||
self._download_webpage(self._DISCLAIMER, None, False, 'Unable to retrieve disclaimer')
|
||||
|
||||
# Confirm age
|
||||
disclaimer_form = {
|
||||
'filters': '0',
|
||||
'submit': "Continue - I'm over 18",
|
||||
}
|
||||
}
|
||||
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
self.report_age_confirmation()
|
||||
self._download_webpage(request, None, False, u'Unable to confirm age')
|
||||
self._download_webpage(request, None, False, 'Unable to confirm age')
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Extract id and simplified title from URL
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
raise ExtractorError('Invalid URL: %s' % url)
|
||||
|
||||
video_id = mobj.group(1)
|
||||
|
||||
@@ -153,23 +154,24 @@ class MetacafeIE(InfoExtractor):
|
||||
else:
|
||||
mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract media URL')
|
||||
raise ExtractorError('Unable to extract media URL')
|
||||
vardict = compat_parse_qs(mobj.group(1))
|
||||
if 'mediaData' not in vardict:
|
||||
raise ExtractorError(u'Unable to extract media URL')
|
||||
mobj = re.search(r'"mediaURL":"(?P<mediaURL>http.*?)",(.*?)"key":"(?P<key>.*?)"', vardict['mediaData'][0])
|
||||
raise ExtractorError('Unable to extract media URL')
|
||||
mobj = re.search(
|
||||
r'"mediaURL":"(?P<mediaURL>http.*?)",(.*?)"key":"(?P<key>.*?)"', vardict['mediaData'][0])
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract media URL')
|
||||
raise ExtractorError('Unable to extract media URL')
|
||||
mediaURL = mobj.group('mediaURL').replace('\\/', '/')
|
||||
video_url = '%s?__gda__=%s' % (mediaURL, mobj.group('key'))
|
||||
video_ext = determine_ext(video_url)
|
||||
|
||||
video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title')
|
||||
video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, 'title')
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
video_uploader = self._html_search_regex(
|
||||
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
|
||||
webpage, u'uploader nickname', fatal=False)
|
||||
webpage, 'uploader nickname', fatal=False)
|
||||
|
||||
if re.search(r'"contentRating":"restricted"', webpage) is not None:
|
||||
age_limit = 18
|
||||
@@ -177,14 +179,12 @@ class MetacafeIE(InfoExtractor):
|
||||
age_limit = 0
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'description': description,
|
||||
'uploader': video_uploader,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'title': video_title,
|
||||
'thumbnail':thumbnail,
|
||||
'ext': video_ext,
|
||||
'ext': video_ext,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
@@ -1,6 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -12,8 +11,9 @@ class NineGagIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
"url": "http://9gag.tv/v/1912",
|
||||
"file": "1912.mp4",
|
||||
"info_dict": {
|
||||
"id": "1912",
|
||||
"ext": "mp4",
|
||||
"description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
|
||||
"title": "\"People Are Awesome 2013\" Is Absolutely Awesome",
|
||||
"view_count": int,
|
||||
|
@@ -1,20 +1,23 @@
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unescapeHTML
|
||||
|
||||
|
||||
class OoyalaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=(?P<id>.+?)(&|$)'
|
||||
_VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)'
|
||||
|
||||
_TEST = {
|
||||
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
|
||||
u'url': u'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
|
||||
u'file': u'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8.mp4',
|
||||
u'md5': u'3f5cceb3a7bf461d6c29dc466cf8033c',
|
||||
u'info_dict': {
|
||||
u'title': u'Explaining Data Recovery from Hard Drives and SSDs',
|
||||
u'description': u'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
|
||||
'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
|
||||
'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
|
||||
'info_dict': {
|
||||
'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Explaining Data Recovery from Hard Drives and SSDs',
|
||||
'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -28,13 +31,14 @@ class OoyalaIE(InfoExtractor):
|
||||
ie=cls.ie_key())
|
||||
|
||||
def _extract_result(self, info, more_info):
|
||||
return {'id': info['embedCode'],
|
||||
'ext': 'mp4',
|
||||
'title': unescapeHTML(info['title']),
|
||||
'url': info.get('ipad_url') or info['url'],
|
||||
'description': unescapeHTML(more_info['description']),
|
||||
'thumbnail': more_info['promo'],
|
||||
}
|
||||
return {
|
||||
'id': info['embedCode'],
|
||||
'ext': 'mp4',
|
||||
'title': unescapeHTML(info['title']),
|
||||
'url': info.get('ipad_url') or info['url'],
|
||||
'description': unescapeHTML(more_info['description']),
|
||||
'thumbnail': more_info['promo'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -42,22 +46,23 @@ class OoyalaIE(InfoExtractor):
|
||||
player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode
|
||||
player = self._download_webpage(player_url, embedCode)
|
||||
mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
|
||||
player, u'mobile player url')
|
||||
player, 'mobile player url')
|
||||
mobile_player = self._download_webpage(mobile_url, embedCode)
|
||||
videos_info = self._search_regex(
|
||||
r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
|
||||
mobile_player, u'info').replace('\\"','"')
|
||||
videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"')
|
||||
mobile_player, 'info').replace('\\"','"')
|
||||
videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"','"')
|
||||
videos_info = json.loads(videos_info)
|
||||
videos_more_info =json.loads(videos_more_info)
|
||||
|
||||
if videos_more_info.get('lineup'):
|
||||
videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
|
||||
return {'_type': 'playlist',
|
||||
'id': embedCode,
|
||||
'title': unescapeHTML(videos_more_info['title']),
|
||||
'entries': videos,
|
||||
}
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': embedCode,
|
||||
'title': unescapeHTML(videos_more_info['title']),
|
||||
'entries': videos,
|
||||
}
|
||||
else:
|
||||
return self._extract_result(videos_info[0], videos_more_info)
|
||||
|
||||
|
53
youtube_dl/extractor/parliamentliveuk.py
Normal file
53
youtube_dl/extractor/parliamentliveuk.py
Normal file
@@ -0,0 +1,53 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ParliamentLiveUKIE(InfoExtractor):
|
||||
IE_NAME = 'parliamentlive.tv'
|
||||
IE_DESC = 'UK parliament videos'
|
||||
_VALID_URL = r'https?://www\.parliamentlive\.tv/Main/Player\.aspx\?(?:[^&]+&)*?meetingId=(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.parliamentlive.tv/Main/Player.aspx?meetingId=15121&player=windowsmedia',
|
||||
'info_dict': {
|
||||
'id': '15121',
|
||||
'ext': 'asf',
|
||||
'title': 'hoc home affairs committee, 18 mar 2014.pm',
|
||||
'description': 'md5:033b3acdf83304cd43946b2d5e5798d1',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Requires mplayer (mms)
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
asx_url = self._html_search_regex(
|
||||
r'embed.*?src="([^"]+)" name="MediaPlayer"', webpage,
|
||||
'metadata URL')
|
||||
asx = self._download_xml(asx_url, video_id, 'Downloading ASX metadata')
|
||||
video_url = asx.find('.//REF').attrib['HREF']
|
||||
|
||||
title = self._search_regex(
|
||||
r'''(?x)player\.setClipDetails\(
|
||||
(?:(?:[0-9]+|"[^"]+"),\s*){2}
|
||||
"([^"]+",\s*"[^"]+)"
|
||||
''',
|
||||
webpage, 'title').replace('", "', ', ')
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<span id="MainContentPlaceHolder_CaptionsBlock_WitnessInfo">(.*?)</span>',
|
||||
webpage, 'description')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'ext': 'asf',
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
@@ -3,6 +3,9 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
US_RATINGS,
|
||||
)
|
||||
|
||||
|
||||
class PBSIE(InfoExtractor):
|
||||
@@ -13,7 +16,7 @@ class PBSIE(InfoExtractor):
|
||||
# Article with embedded player
|
||||
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
|
||||
# Player
|
||||
video\.pbs\.org/partnerplayer/(?P<player_id>[^/]+)/
|
||||
video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
|
||||
)
|
||||
'''
|
||||
|
||||
@@ -57,6 +60,11 @@ class PBSIE(InfoExtractor):
|
||||
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
||||
info = self._download_json(info_url, display_id)
|
||||
|
||||
rating_str = info.get('rating')
|
||||
if rating_str is not None:
|
||||
rating_str = rating_str.rpartition('-')[2]
|
||||
age_limit = US_RATINGS.get(rating_str)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'],
|
||||
@@ -65,4 +73,5 @@ class PBSIE(InfoExtractor):
|
||||
'description': info['program'].get('description'),
|
||||
'thumbnail': info.get('image_url'),
|
||||
'duration': info.get('duration'),
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
str_to_int,
|
||||
)
|
||||
from ..aes import (
|
||||
aes_decrypt_text
|
||||
@@ -27,6 +28,12 @@ class PornHubIE(InfoExtractor):
|
||||
}
|
||||
}
|
||||
|
||||
def _extract_count(self, pattern, webpage, name):
|
||||
count = self._html_search_regex(pattern, webpage, '%s count' % name, fatal=False)
|
||||
if count:
|
||||
count = str_to_int(count)
|
||||
return count
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
@@ -37,11 +44,19 @@ class PornHubIE(InfoExtractor):
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
|
||||
video_uploader = self._html_search_regex(r'<b>From: </b>(?:\s|<[^>]*>)*(.+?)<', webpage, 'uploader', fatal=False)
|
||||
video_uploader = self._html_search_regex(
|
||||
r'(?s)<div class="video-info-row">\s*From: .+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<',
|
||||
webpage, 'uploader', fatal=False)
|
||||
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
|
||||
if thumbnail:
|
||||
thumbnail = compat_urllib_parse.unquote(thumbnail)
|
||||
|
||||
view_count = self._extract_count(r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
|
||||
like_count = self._extract_count(r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
|
||||
dislike_count = self._extract_count(r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
|
||||
comment_count = self._extract_count(
|
||||
r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment')
|
||||
|
||||
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
|
||||
if webpage.find('"encrypted":true') != -1:
|
||||
password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
|
||||
@@ -77,6 +92,10 @@ class PornHubIE(InfoExtractor):
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'thumbnail': thumbnail,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'comment_count': comment_count,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
@@ -1,4 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -6,16 +8,17 @@ from .common import InfoExtractor
|
||||
|
||||
class RadioFranceIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
|
||||
IE_NAME = u'radiofrance'
|
||||
IE_NAME = 'radiofrance'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://maison.radiofrance.fr/radiovisions/one-one',
|
||||
u'file': u'one-one.ogg',
|
||||
u'md5': u'bdbb28ace95ed0e04faab32ba3160daf',
|
||||
u'info_dict': {
|
||||
u"title": u"One to one",
|
||||
u"description": u"Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
|
||||
u"uploader": u"Thomas Hercouët",
|
||||
'url': 'http://maison.radiofrance.fr/radiovisions/one-one',
|
||||
'md5': 'bdbb28ace95ed0e04faab32ba3160daf',
|
||||
'info_dict': {
|
||||
'id': 'one-one',
|
||||
'ext': 'ogg',
|
||||
"title": "One to one",
|
||||
"description": "Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
|
||||
"uploader": "Thomas Hercouët",
|
||||
},
|
||||
}
|
||||
|
||||
@@ -24,27 +27,28 @@ class RadioFranceIE(InfoExtractor):
|
||||
video_id = m.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, u'title')
|
||||
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>',
|
||||
webpage, u'description', fatal=False)
|
||||
webpage, 'description', fatal=False)
|
||||
uploader = self._html_search_regex(
|
||||
r'<div class="credit"> © (.*?)</div>',
|
||||
webpage, u'uploader', fatal=False)
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
formats_str = self._html_search_regex(
|
||||
r'class="jp-jplayer[^"]*" data-source="([^"]+)">',
|
||||
webpage, u'audio URLs')
|
||||
webpage, 'audio URLs')
|
||||
formats = [
|
||||
{
|
||||
'format_id': fm[0],
|
||||
'url': fm[1],
|
||||
'vcodec': 'none',
|
||||
'preference': i,
|
||||
}
|
||||
for fm in
|
||||
re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str)
|
||||
for i, fm in
|
||||
enumerate(re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str))
|
||||
]
|
||||
# No sorting, we don't know any more about these formats
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
63
youtube_dl/extractor/rts.py
Normal file
63
youtube_dl/extractor/rts.py
Normal file
@@ -0,0 +1,63 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class RTSIE(InfoExtractor):
|
||||
IE_DESC = 'RTS.ch'
|
||||
_VALID_URL = r'^https?://(?:www\.)?rts\.ch/archives/tv/[^/]+/(?P<id>[0-9]+)-.*?\.html'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
|
||||
'md5': '753b877968ad8afaeddccc374d4256a5',
|
||||
'info_dict': {
|
||||
'id': '3449373',
|
||||
'ext': 'mp4',
|
||||
'duration': 1488,
|
||||
'title': 'Les Enfants Terribles',
|
||||
'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.',
|
||||
'uploader': 'Divers',
|
||||
'upload_date': '19680921',
|
||||
'timestamp': -40280400,
|
||||
'thumbnail': 're:^https?://.*\.image'
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
|
||||
all_info = self._download_json(
|
||||
'http://www.rts.ch/a/%s.html?f=json/article' % video_id, video_id)
|
||||
info = all_info['video']['JSONinfo']
|
||||
|
||||
upload_timestamp = parse_iso8601(info.get('broadcast_date'))
|
||||
duration = parse_duration(info.get('duration'))
|
||||
thumbnail = unescapeHTML(info.get('preview_image_url'))
|
||||
formats = [{
|
||||
'format_id': fid,
|
||||
'url': furl,
|
||||
'tbr': int_or_none(self._search_regex(
|
||||
r'-([0-9]+)k\.', furl, 'bitrate', default=None)),
|
||||
} for fid, furl in info['streams'].items()]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': info['title'],
|
||||
'description': info.get('intro'),
|
||||
'duration': duration,
|
||||
'uploader': info.get('programName'),
|
||||
'timestamp': upload_timestamp,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@@ -100,7 +100,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
|
||||
def report_resolve(self, video_id):
|
||||
"""Report information extraction."""
|
||||
self.to_screen(u'%s: Resolving id' % video_id)
|
||||
self.to_screen('%s: Resolving id' % video_id)
|
||||
|
||||
@classmethod
|
||||
def _resolv_url(cls, url):
|
||||
@@ -124,45 +124,46 @@ class SoundcloudIE(InfoExtractor):
|
||||
'description': info['description'],
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
formats = []
|
||||
if info.get('downloadable', False):
|
||||
# We can build a direct link to the song
|
||||
format_url = (
|
||||
'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
|
||||
track_id, self._CLIENT_ID))
|
||||
result['formats'] = [{
|
||||
formats.append({
|
||||
'format_id': 'download',
|
||||
'ext': info.get('original_format', 'mp3'),
|
||||
'url': format_url,
|
||||
'vcodec': 'none',
|
||||
}]
|
||||
else:
|
||||
# We have to retrieve the url
|
||||
streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?'
|
||||
'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
|
||||
stream_json = self._download_webpage(
|
||||
streams_url,
|
||||
track_id, 'Downloading track url')
|
||||
'preference': 10,
|
||||
})
|
||||
|
||||
formats = []
|
||||
format_dict = json.loads(stream_json)
|
||||
for key, stream_url in format_dict.items():
|
||||
if key.startswith(u'http'):
|
||||
formats.append({
|
||||
'format_id': key,
|
||||
'ext': ext,
|
||||
'url': stream_url,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
elif key.startswith(u'rtmp'):
|
||||
# The url doesn't have an rtmp app, we have to extract the playpath
|
||||
url, path = stream_url.split('mp3:', 1)
|
||||
formats.append({
|
||||
'format_id': key,
|
||||
'url': url,
|
||||
'play_path': 'mp3:' + path,
|
||||
'ext': ext,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
# We have to retrieve the url
|
||||
streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?'
|
||||
'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
|
||||
stream_json = self._download_webpage(
|
||||
streams_url,
|
||||
track_id, 'Downloading track url')
|
||||
|
||||
format_dict = json.loads(stream_json)
|
||||
for key, stream_url in format_dict.items():
|
||||
if key.startswith('http'):
|
||||
formats.append({
|
||||
'format_id': key,
|
||||
'ext': ext,
|
||||
'url': stream_url,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
elif key.startswith('rtmp'):
|
||||
# The url doesn't have an rtmp app, we have to extract the playpath
|
||||
url, path = stream_url.split('mp3:', 1)
|
||||
formats.append({
|
||||
'format_id': key,
|
||||
'url': url,
|
||||
'play_path': 'mp3:' + path,
|
||||
'ext': ext,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
if not formats:
|
||||
# We fallback to the stream_url in the original info, this
|
||||
@@ -188,7 +189,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
raise ExtractorError('Invalid URL: %s' % url)
|
||||
|
||||
track_id = mobj.group('track_id')
|
||||
token = None
|
||||
@@ -226,7 +227,7 @@ class SoundcloudSetIE(SoundcloudIE):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
raise ExtractorError('Invalid URL: %s' % url)
|
||||
|
||||
# extract uploader (which is in the url)
|
||||
uploader = mobj.group(1)
|
||||
@@ -243,7 +244,7 @@ class SoundcloudSetIE(SoundcloudIE):
|
||||
info = json.loads(info_json)
|
||||
if 'errors' in info:
|
||||
for err in info['errors']:
|
||||
self._downloader.report_error(u'unable to download video webpage: %s' % compat_str(err['error_message']))
|
||||
self._downloader.report_error('unable to download video webpage: %s' % compat_str(err['error_message']))
|
||||
return
|
||||
|
||||
self.report_extraction(full_title)
|
||||
|
@@ -11,7 +11,9 @@ from ..utils import (
|
||||
|
||||
|
||||
class TEDIE(SubtitlesInfoExtractor):
|
||||
_VALID_URL = r'''(?x)http://www\.ted\.com/
|
||||
_VALID_URL = r'''(?x)
|
||||
(?P<proto>https?://)
|
||||
(?P<type>www|embed)(?P<urlmain>\.ted\.com/
|
||||
(
|
||||
(?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
|
||||
|
|
||||
@@ -19,6 +21,7 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
)
|
||||
(/lang/(.*?))? # The url may contain the language
|
||||
/(?P<name>\w+) # Here goes the name and then ".html"
|
||||
.*)$
|
||||
'''
|
||||
_TEST = {
|
||||
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
|
||||
@@ -48,6 +51,9 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
if m.group('type') == 'embed':
|
||||
desktop_url = m.group('proto') + 'www' + m.group('urlmain')
|
||||
return self.url_result(desktop_url, 'TED')
|
||||
name = m.group('name')
|
||||
if m.group('type_talk'):
|
||||
return self._talk_info(url, name)
|
||||
|
75
youtube_dl/extractor/toypics.py
Normal file
75
youtube_dl/extractor/toypics.py
Normal file
@@ -0,0 +1,75 @@
|
||||
from .common import InfoExtractor
|
||||
import re
|
||||
|
||||
|
||||
class ToypicsIE(InfoExtractor):
|
||||
IE_DESC = 'Toypics user profile'
|
||||
_VALID_URL = r'http://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*'
|
||||
_TEST = {
|
||||
'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/',
|
||||
'md5': '16e806ad6d6f58079d210fe30985e08b',
|
||||
'info_dict': {
|
||||
'id': '514',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chance-Bulge\'d, 2',
|
||||
'age_limit': 18,
|
||||
'uploader': 'kidsune',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
page = self._download_webpage(url, video_id)
|
||||
video_url = self._html_search_regex(
|
||||
r'src:\s+"(http://static[0-9]+\.toypics\.net/flvideo/[^"]+)"', page, 'video URL')
|
||||
title = self._html_search_regex(
|
||||
r'<title>Toypics - ([^<]+)</title>', page, 'title')
|
||||
username = self._html_search_regex(
|
||||
r'toypics.net/([^/"]+)" class="user-name">', page, 'username')
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'uploader': username,
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
||||
|
||||
class ToypicsUserIE(InfoExtractor):
|
||||
IE_DESC = 'Toypics user profile'
|
||||
_VALID_URL = r'http://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
username = mobj.group('username')
|
||||
|
||||
profile_page = self._download_webpage(
|
||||
url, username, note='Retrieving profile page')
|
||||
|
||||
video_count = int(self._search_regex(
|
||||
r'public/">Public Videos \(([0-9]+)\)</a></li>', profile_page,
|
||||
'video count'))
|
||||
|
||||
PAGE_SIZE = 8
|
||||
urls = []
|
||||
page_count = (video_count + PAGE_SIZE + 1) // PAGE_SIZE
|
||||
for n in range(1, page_count + 1):
|
||||
lpage_url = url + '/public/%d' % n
|
||||
lpage = self._download_webpage(
|
||||
lpage_url, username,
|
||||
note='Downloading page %d/%d' % (n, page_count))
|
||||
urls.extend(
|
||||
re.findall(
|
||||
r'<p class="video-entry-title">\n\s*<a href="(http://videos.toypics.net/view/[^"]+)">',
|
||||
lpage))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': username,
|
||||
'entries': [{
|
||||
'_type': 'url',
|
||||
'url': eurl,
|
||||
'ie_key': 'Toypics',
|
||||
} for eurl in urls]
|
||||
}
|
@@ -4,26 +4,99 @@ import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import compat_urllib_request
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class VeohIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/v(?P<id>\d*)'
|
||||
_VALID_URL = r'http://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|yapi-)[\da-zA-Z]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
|
||||
'file': '56314296.mp4',
|
||||
'md5': '620e68e6a3cff80086df3348426c9ca3',
|
||||
'info_dict': {
|
||||
'title': 'Straight Backs Are Stronger',
|
||||
'uploader': 'LUMOback',
|
||||
'description': 'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ',
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
|
||||
'md5': '620e68e6a3cff80086df3348426c9ca3',
|
||||
'info_dict': {
|
||||
'id': '56314296',
|
||||
'ext': 'mp4',
|
||||
'title': 'Straight Backs Are Stronger',
|
||||
'uploader': 'LUMOback',
|
||||
'description': 'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
|
||||
'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
|
||||
'info_dict': {
|
||||
'id': '27701988',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chile workers cover up to avoid skin damage',
|
||||
'description': 'md5:2bd151625a60a32822873efc246ba20d',
|
||||
'uploader': 'afp-news',
|
||||
'duration': 123,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
|
||||
'md5': '4fde7b9e33577bab2f2f8f260e30e979',
|
||||
'note': 'Embedded ooyala video',
|
||||
'info_dict': {
|
||||
'id': '69525809',
|
||||
'ext': 'mp4',
|
||||
'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
|
||||
'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
|
||||
'uploader': 'newsy-videos',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _extract_formats(self, source):
|
||||
formats = []
|
||||
link = source.get('aowPermalink')
|
||||
if link:
|
||||
formats.append({
|
||||
'url': link,
|
||||
'ext': 'mp4',
|
||||
'format_id': 'aow',
|
||||
})
|
||||
link = source.get('fullPreviewHashLowPath')
|
||||
if link:
|
||||
formats.append({
|
||||
'url': link,
|
||||
'format_id': 'low',
|
||||
})
|
||||
link = source.get('fullPreviewHashHighPath')
|
||||
if link:
|
||||
formats.append({
|
||||
'url': link,
|
||||
'format_id': 'high',
|
||||
})
|
||||
return formats
|
||||
|
||||
def _extract_video(self, source):
|
||||
return {
|
||||
'id': source.get('videoId'),
|
||||
'title': source.get('title'),
|
||||
'description': source.get('description'),
|
||||
'thumbnail': source.get('highResImage') or source.get('medResImage'),
|
||||
'uploader': source.get('username'),
|
||||
'duration': int_or_none(source.get('length')),
|
||||
'view_count': int_or_none(source.get('views')),
|
||||
'age_limit': 18 if source.get('isMature') == 'true' or source.get('isSexy') == 'true' else 0,
|
||||
'formats': self._extract_formats(source),
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
if video_id.startswith('v'):
|
||||
rsp = self._download_xml(
|
||||
r'http://www.veoh.com/api/findByPermalink?permalink=%s' % video_id, video_id, 'Downloading video XML')
|
||||
if rsp.get('stat') == 'ok':
|
||||
return self._extract_video(rsp.find('./videoList/video'))
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
age_limit = 0
|
||||
if 'class="adultwarning-container"' in webpage:
|
||||
@@ -33,24 +106,16 @@ class VeohIE(InfoExtractor):
|
||||
request.add_header('Cookie', 'confirmedAdult=true')
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
|
||||
m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|")', webpage)
|
||||
m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|"|\?)', webpage)
|
||||
if m_youtube is not None:
|
||||
youtube_id = m_youtube.group(1)
|
||||
self.to_screen('%s: detected Youtube video.' % video_id)
|
||||
return self.url_result(youtube_id, 'Youtube')
|
||||
|
||||
self.report_extraction(video_id)
|
||||
info = self._search_regex(r'videoDetailsJSON = \'({.*?})\';', webpage, 'info')
|
||||
info = json.loads(info)
|
||||
video_url = info.get('fullPreviewHashHighPath') or info.get('fullPreviewHashLowPath')
|
||||
info = json.loads(
|
||||
self._search_regex(r'videoDetailsJSON = \'({.*?})\';', webpage, 'info').replace('\\\'', '\''))
|
||||
|
||||
return {
|
||||
'id': info['videoId'],
|
||||
'title': info['title'],
|
||||
'url': video_url,
|
||||
'uploader': info['username'],
|
||||
'thumbnail': info.get('highResImage') or info.get('medResImage'),
|
||||
'description': info['description'],
|
||||
'view_count': info['views'],
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
video = self._extract_video(info)
|
||||
video['age_limit'] = age_limit
|
||||
|
||||
return video
|
||||
|
70
youtube_dl/extractor/videolecturesnet.py
Normal file
70
youtube_dl/extractor/videolecturesnet.py
Normal file
@@ -0,0 +1,70 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
find_xpath_attr,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class VideoLecturesNetIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/'
|
||||
IE_NAME = 'videolectures.net'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/',
|
||||
'info_dict': {
|
||||
'id': 'promogram_igor_mekjavic_eng',
|
||||
'ext': 'mp4',
|
||||
'title': 'Automatics, robotics and biocybernetics',
|
||||
'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
|
||||
'upload_date': '20130627',
|
||||
'duration': 565,
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
smil_url = 'http://videolectures.net/%s/video/1/smil.xml' % video_id
|
||||
smil = self._download_xml(smil_url, video_id)
|
||||
|
||||
title = find_xpath_attr(smil, './/meta', 'name', 'title').attrib['content']
|
||||
description_el = find_xpath_attr(smil, './/meta', 'name', 'abstract')
|
||||
description = (
|
||||
None if description_el is None
|
||||
else description_el.attrib['content'])
|
||||
upload_date = unified_strdate(
|
||||
find_xpath_attr(smil, './/meta', 'name', 'date').attrib['content'])
|
||||
|
||||
switch = smil.find('.//switch')
|
||||
duration = parse_duration(switch.attrib.get('dur'))
|
||||
thumbnail_el = find_xpath_attr(switch, './image', 'type', 'thumbnail')
|
||||
thumbnail = (
|
||||
None if thumbnail_el is None else thumbnail_el.attrib.get('src'))
|
||||
|
||||
formats = [{
|
||||
'url': v.attrib['src'],
|
||||
'width': int_or_none(v.attrib.get('width')),
|
||||
'height': int_or_none(v.attrib.get('height')),
|
||||
'filesize': int_or_none(v.attrib.get('size')),
|
||||
'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
|
||||
'ext': v.attrib.get('ext'),
|
||||
} for v in switch.findall('./video')
|
||||
if v.attrib.get('proto') == 'http']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
@@ -1,29 +1,33 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
US_RATINGS,
|
||||
)
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
|
||||
|
||||
class VikiIE(SubtitlesInfoExtractor):
|
||||
IE_NAME = u'viki'
|
||||
IE_NAME = 'viki'
|
||||
|
||||
_VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14',
|
||||
u'file': u'1023585v.mp4',
|
||||
u'md5': u'a21454021c2646f5433514177e2caa5f',
|
||||
u'info_dict': {
|
||||
u'title': u'Heirs Episode 14',
|
||||
u'uploader': u'SBS',
|
||||
u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
|
||||
u'upload_date': u'20131121',
|
||||
u'age_limit': 13,
|
||||
'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
|
||||
'md5': 'a21454021c2646f5433514177e2caa5f',
|
||||
'info_dict': {
|
||||
'id': '1023585v',
|
||||
'ext': 'mp4',
|
||||
'title': 'Heirs Episode 14',
|
||||
'uploader': 'SBS',
|
||||
'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
|
||||
'upload_date': '20131121',
|
||||
'age_limit': 13,
|
||||
},
|
||||
u'skip': u'Blocked in the US',
|
||||
'skip': 'Blocked in the US',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -44,28 +48,21 @@ class VikiIE(SubtitlesInfoExtractor):
|
||||
|
||||
rating_str = self._html_search_regex(
|
||||
r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
|
||||
u'rating information', default='').strip()
|
||||
RATINGS = {
|
||||
'G': 0,
|
||||
'PG': 10,
|
||||
'PG-13': 13,
|
||||
'R': 16,
|
||||
'NC': 18,
|
||||
}
|
||||
age_limit = RATINGS.get(rating_str)
|
||||
'rating information', default='').strip()
|
||||
age_limit = US_RATINGS.get(rating_str)
|
||||
|
||||
info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
|
||||
info_webpage = self._download_webpage(
|
||||
info_url, video_id, note=u'Downloading info page')
|
||||
info_url, video_id, note='Downloading info page')
|
||||
if re.match(r'\s*<div\s+class="video-error', info_webpage):
|
||||
raise ExtractorError(
|
||||
u'Video %s is blocked from your location.' % video_id,
|
||||
'Video %s is blocked from your location.' % video_id,
|
||||
expected=True)
|
||||
video_url = self._html_search_regex(
|
||||
r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
|
||||
r'<source[^>]+src="([^"]+)"', info_webpage, 'video URL')
|
||||
|
||||
upload_date_str = self._html_search_regex(
|
||||
r'"created_at":"([^"]+)"', info_webpage, u'upload date')
|
||||
r'"created_at":"([^"]+)"', info_webpage, 'upload date')
|
||||
upload_date = (
|
||||
unified_strdate(upload_date_str)
|
||||
if upload_date_str is not None
|
||||
|
103
youtube_dl/extractor/washingtonpost.py
Normal file
103
youtube_dl/extractor/washingtonpost.py
Normal file
@@ -0,0 +1,103 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
strip_jsonp,
|
||||
)
|
||||
|
||||
|
||||
class WashingtonPostIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
|
||||
_TEST = {
|
||||
'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
|
||||
'playlist': [{
|
||||
'md5': 'c3f4b4922ffa259243f68e928db2db8c',
|
||||
'info_dict': {
|
||||
'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f',
|
||||
'ext': 'mp4',
|
||||
'title': 'Breaking Points: The Paper Mine',
|
||||
'duration': 1287,
|
||||
'description': 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.',
|
||||
'uploader': 'The Washington Post',
|
||||
'timestamp': 1395527908,
|
||||
'upload_date': '20140322',
|
||||
},
|
||||
}, {
|
||||
'md5': 'f645a07652c2950cd9134bb852c5f5eb',
|
||||
'info_dict': {
|
||||
'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f',
|
||||
'ext': 'mp4',
|
||||
'title': 'The town bureaucracy sustains',
|
||||
'description': 'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it\'s like to do paperwork 230 feet underground.',
|
||||
'duration': 2217,
|
||||
'timestamp': 1395528005,
|
||||
'upload_date': '20140322',
|
||||
'uploader': 'The Washington Post',
|
||||
},
|
||||
}]
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
title = self._og_search_title(webpage)
|
||||
uuids = re.findall(r'data-video-uuid="([^"]+)"', webpage)
|
||||
entries = []
|
||||
for i, uuid in enumerate(uuids, start=1):
|
||||
vinfo_all = self._download_json(
|
||||
'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % uuid,
|
||||
page_id,
|
||||
transform_source=strip_jsonp,
|
||||
note='Downloading information of video %d/%d' % (i, len(uuids))
|
||||
)
|
||||
vinfo = vinfo_all[0]['contentConfig']
|
||||
uploader = vinfo.get('credits', {}).get('source')
|
||||
timestamp = int_or_none(
|
||||
vinfo.get('dateConfig', {}).get('dateFirstPublished'), 1000)
|
||||
|
||||
formats = [{
|
||||
'format_id': (
|
||||
'%s-%s-%s' % (s.get('type'), s.get('width'), s.get('bitrate'))
|
||||
if s.get('width')
|
||||
else s.get('type')),
|
||||
'vbr': s.get('bitrate') if s.get('width') != 0 else None,
|
||||
'width': s.get('width'),
|
||||
'height': s.get('height'),
|
||||
'acodec': s.get('audioCodec'),
|
||||
'vcodec': s.get('videoCodec') if s.get('width') != 0 else 'none',
|
||||
'filesize': s.get('fileSize'),
|
||||
'url': s.get('url'),
|
||||
'ext': 'mp4',
|
||||
'protocol': {
|
||||
'MP4': 'http',
|
||||
'F4F': 'f4m',
|
||||
}.get(s.get('type'))
|
||||
} for s in vinfo.get('streams', [])]
|
||||
source_media_url = vinfo.get('sourceMediaURL')
|
||||
if source_media_url:
|
||||
formats.append({
|
||||
'format_id': 'source_media',
|
||||
'url': source_media_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
entries.append({
|
||||
'id': uuid,
|
||||
'title': vinfo['title'],
|
||||
'description': vinfo.get('blurb'),
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
'duration': int_or_none(vinfo.get('videoDuration'), 100),
|
||||
'timestamp': timestamp,
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
'id': page_id,
|
||||
'title': title,
|
||||
}
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -7,14 +9,14 @@ class WorldStarHipHopIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)'
|
||||
_TEST = {
|
||||
"url": "http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO",
|
||||
"file": "wshh6a7q1ny0G34ZwuIO.mp4",
|
||||
"md5": "9d04de741161603bf7071bbf4e883186",
|
||||
"info_dict": {
|
||||
"id": "wshh6a7q1ny0G34ZwuIO",
|
||||
"ext": "mp4",
|
||||
"title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
@@ -23,41 +25,32 @@ class WorldStarHipHopIE(InfoExtractor):
|
||||
|
||||
m_vevo_id = re.search(r'videoId=(.*?)&?',
|
||||
webpage_src)
|
||||
|
||||
if m_vevo_id is not None:
|
||||
self.to_screen(u'Vevo video detected:')
|
||||
return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
|
||||
|
||||
video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)',
|
||||
webpage_src, u'video URL')
|
||||
video_url = self._search_regex(
|
||||
r'so\.addVariable\("file","(.*?)"\)', webpage_src, 'video URL')
|
||||
|
||||
if 'youtube' in video_url:
|
||||
self.to_screen(u'Youtube video detected:')
|
||||
return self.url_result(video_url, ie='Youtube')
|
||||
|
||||
if 'mp4' in video_url:
|
||||
ext = 'mp4'
|
||||
else:
|
||||
ext = 'flv'
|
||||
|
||||
video_title = self._html_search_regex(r"<title>(.*)</title>",
|
||||
webpage_src, u'title')
|
||||
video_title = self._html_search_regex(
|
||||
r"<title>(.*)</title>", webpage_src, 'title')
|
||||
|
||||
# Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
|
||||
thumbnail = self._html_search_regex(r'rel="image_src" href="(.*)" />',
|
||||
webpage_src, u'thumbnail', fatal=False)
|
||||
|
||||
thumbnail = self._html_search_regex(
|
||||
r'rel="image_src" href="(.*)" />', webpage_src, 'thumbnail',
|
||||
fatal=False)
|
||||
if not thumbnail:
|
||||
_title = r"""candytitles.*>(.*)</span>"""
|
||||
mobj = re.search(_title, webpage_src)
|
||||
if mobj is not None:
|
||||
video_title = mobj.group(1)
|
||||
|
||||
results = [{
|
||||
'id': video_id,
|
||||
'url' : video_url,
|
||||
'title' : video_title,
|
||||
'thumbnail' : thumbnail,
|
||||
'ext' : ext,
|
||||
}]
|
||||
return results
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
50
youtube_dl/extractor/xbef.py
Normal file
50
youtube_dl/extractor/xbef.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
|
||||
class XBefIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?xbef\.com/video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://xbef.com/video/5119-glamourous-lesbians-smoking-drinking-and-fucking',
|
||||
'md5': 'a478b565baff61634a98f5e5338be995',
|
||||
'info_dict': {
|
||||
'id': '5119',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:7358a9faef8b7b57acda7c04816f170e',
|
||||
'age_limit': 18,
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(
|
||||
r'<h1[^>]*>(.*?)</h1>', webpage, 'title')
|
||||
|
||||
config_url_enc = self._download_webpage(
|
||||
'http://xbef.com/Main/GetVideoURLEncoded/%s' % video_id, video_id,
|
||||
note='Retrieving config URL')
|
||||
config_url = compat_urllib_parse.unquote(config_url_enc)
|
||||
config = self._download_xml(
|
||||
config_url, video_id, note='Retrieving config')
|
||||
|
||||
video_url = config.find('./file').text
|
||||
thumbnail = config.find('./image').text
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
@@ -1,11 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
@@ -42,7 +41,6 @@ class XTubeIE(InfoExtractor):
|
||||
r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False)
|
||||
video_description = self._html_search_regex(
|
||||
r'<p class="fieldsDesc">([^<]+)', webpage, 'description', fatal=False)
|
||||
video_url = self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, 'video_url').replace('\\/', '/')
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'<span class="bold">Runtime:</span> ([^<]+)</p>', webpage, 'duration', fatal=False))
|
||||
view_count = self._html_search_regex(
|
||||
@@ -54,12 +52,18 @@ class XTubeIE(InfoExtractor):
|
||||
if comment_count:
|
||||
comment_count = str_to_int(comment_count)
|
||||
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
extension = os.path.splitext(path)[1][1:]
|
||||
format = path.split('/')[5].split('_')[:2]
|
||||
format[0] += 'p'
|
||||
format[1] += 'k'
|
||||
format = "-".join(format)
|
||||
player_quality_option = json.loads(self._html_search_regex(
|
||||
r'playerQualityOption = ({.+?});', webpage, 'player quality option'))
|
||||
|
||||
QUALITIES = ['3gp', 'mp4_normal', 'mp4_high', 'flv', 'mp4_ultra', 'mp4_720', 'mp4_1080']
|
||||
formats = [
|
||||
{
|
||||
'url': furl,
|
||||
'format_id': format_id,
|
||||
'preference': QUALITIES.index(format_id) if format_id in QUALITIES else -1,
|
||||
} for format_id, furl in player_quality_option.items()
|
||||
]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -69,9 +73,42 @@ class XTubeIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'url': video_url,
|
||||
'ext': extension,
|
||||
'format': format,
|
||||
'format_id': format,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
||||
class XTubeUserIE(InfoExtractor):
|
||||
IE_DESC = 'XTube user profile'
|
||||
_VALID_URL = r'https?://(?:www\.)?xtube\.com/community/profile\.php\?(.*?)user=(?P<username>[^&#]+)(?:$|[&#])'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
username = mobj.group('username')
|
||||
|
||||
profile_page = self._download_webpage(
|
||||
url, username, note='Retrieving profile page')
|
||||
|
||||
video_count = int(self._search_regex(
|
||||
r'<strong>%s\'s Videos \(([0-9]+)\)</strong>'%username, profile_page,
|
||||
'video count'))
|
||||
|
||||
PAGE_SIZE = 25
|
||||
urls = []
|
||||
page_count = (video_count + PAGE_SIZE + 1) // PAGE_SIZE
|
||||
for n in range(1, page_count + 1):
|
||||
lpage_url = 'http://www.xtube.com/user_videos.php?page=%d&u=%s' % (n, username)
|
||||
lpage = self._download_webpage(
|
||||
lpage_url, username,
|
||||
note='Downloading page %d/%d' % (n, page_count))
|
||||
urls.extend(
|
||||
re.findall(r'addthis:url="([^"]+)"', lpage))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': username,
|
||||
'entries': [{
|
||||
'_type': 'url',
|
||||
'url': eurl,
|
||||
'ie_key': 'XTube',
|
||||
} for eurl in urls]
|
||||
}
|
||||
|
@@ -1,3 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
@@ -17,24 +20,25 @@ from ..aes import (
|
||||
|
||||
|
||||
class YouPornIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
|
||||
_VALID_URL = r'^(?P<proto>https?://)(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
|
||||
_TEST = {
|
||||
u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
|
||||
u'file': u'505835.mp4',
|
||||
u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20101221",
|
||||
u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
|
||||
u"uploader": u"Ask Dan And Jennifer",
|
||||
u"title": u"Sex Ed: Is It Safe To Masturbate Daily?",
|
||||
u"age_limit": 18,
|
||||
'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
|
||||
'md5': '71ec5fcfddacf80f495efa8b6a8d9a89',
|
||||
'info_dict': {
|
||||
'id': '505835',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20101221',
|
||||
'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
|
||||
'uploader': 'Ask Dan And Jennifer',
|
||||
'title': 'Sex Ed: Is It Safe To Masturbate Daily?',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
url = 'http://www.' + mobj.group('url')
|
||||
url = mobj.group('proto') + 'www.' + mobj.group('url')
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
@@ -42,7 +46,7 @@ class YouPornIE(InfoExtractor):
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
# Get JSON parameters
|
||||
json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
|
||||
json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, 'JSON parameters')
|
||||
try:
|
||||
params = json.loads(json_params)
|
||||
except:
|
||||
@@ -61,7 +65,7 @@ class YouPornIE(InfoExtractor):
|
||||
# Get all of the links from the page
|
||||
DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
|
||||
download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
|
||||
webpage, u'download list').strip()
|
||||
webpage, 'download list').strip()
|
||||
LINK_RE = r'<a href="([^"]+)">'
|
||||
links = re.findall(LINK_RE, download_list_html)
|
||||
|
||||
@@ -86,7 +90,7 @@ class YouPornIE(InfoExtractor):
|
||||
resolution = format_parts[0]
|
||||
height = int(resolution[:-len('p')])
|
||||
bitrate = int(format_parts[1][:-len('k')])
|
||||
format = u'-'.join(format_parts) + u'-' + dn
|
||||
format = '-'.join(format_parts) + '-' + dn
|
||||
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
|
@@ -176,32 +176,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
|
||||
# 3d videos
|
||||
'82': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': '3D', 'preference': -20},
|
||||
'83': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': '3D', 'preference': -20},
|
||||
'84': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': '3D', 'preference': -20},
|
||||
'85': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': '3D', 'preference': -20},
|
||||
'100': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': '3D', 'preference': -20},
|
||||
'101': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': '3D', 'preference': -20},
|
||||
'102': {'ext': 'webm', 'height': 720, 'resolution': '720p', 'format_note': '3D', 'preference': -20},
|
||||
'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
|
||||
'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
|
||||
'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
|
||||
'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
|
||||
'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
|
||||
'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
|
||||
'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
|
||||
|
||||
# Apple HTTP Live Streaming
|
||||
'92': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'HLS', 'preference': -10},
|
||||
'93': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'HLS', 'preference': -10},
|
||||
'94': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'HLS', 'preference': -10},
|
||||
'95': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'HLS', 'preference': -10},
|
||||
'96': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'HLS', 'preference': -10},
|
||||
'132': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'HLS', 'preference': -10},
|
||||
'151': {'ext': 'mp4', 'height': 72, 'resolution': '72p', 'format_note': 'HLS', 'preference': -10},
|
||||
'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
|
||||
'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
|
||||
'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
|
||||
'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
|
||||
'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
|
||||
'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
|
||||
'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
|
||||
|
||||
# DASH mp4 video
|
||||
'133': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'134': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'138': {'ext': 'mp4', 'height': 2160, 'resolution': '2160p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'264': {'ext': 'mp4', 'height': 1440, 'resolution': '1440p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
|
||||
# Dash mp4 audio
|
||||
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
|
||||
@@ -215,13 +215,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||
'242': {'ext': 'webm', 'height': 240, 'resolution': '240p', 'format_note': 'DASH webm', 'preference': -40},
|
||||
'243': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': 'DASH webm', 'preference': -40},
|
||||
'244': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
|
||||
'245': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
|
||||
'246': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
|
||||
'247': {'ext': 'webm', 'height': 720, 'resolution': '720p', 'format_note': 'DASH webm', 'preference': -40},
|
||||
'248': {'ext': 'webm', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH webm', 'preference': -40},
|
||||
'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH webm', 'preference': -40},
|
||||
'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH webm', 'preference': -40},
|
||||
'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
|
||||
'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
|
||||
'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
|
||||
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH webm', 'preference': -40},
|
||||
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH webm', 'preference': -40},
|
||||
|
||||
# Dash webm audio
|
||||
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 48, 'preference': -50},
|
||||
@@ -1130,14 +1130,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
|
||||
|
||||
def _real_extract(self, url):
|
||||
proto = (
|
||||
u'http' if self._downloader.params.get('prefer_insecure', False)
|
||||
else u'https')
|
||||
|
||||
# Extract original video URL from URL with redirection, like age verification, using next_url parameter
|
||||
mobj = re.search(self._NEXT_URL_RE, url)
|
||||
if mobj:
|
||||
url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
|
||||
url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
|
||||
video_id = self.extract_id(url)
|
||||
|
||||
# Get video webpage
|
||||
url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
|
||||
url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
|
||||
video_webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Attempt to extract SWF player URL
|
||||
@@ -1162,7 +1166,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'asv': 3,
|
||||
'sts':'1588',
|
||||
})
|
||||
video_info_url = 'https://www.youtube.com/get_video_info?' + data
|
||||
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
|
||||
video_info_webpage = self._download_webpage(video_info_url, video_id,
|
||||
note=False,
|
||||
errnote='unable to download video info webpage')
|
||||
@@ -1170,7 +1174,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
else:
|
||||
age_gate = False
|
||||
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
|
||||
video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
|
||||
video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
|
||||
% (video_id, el_type))
|
||||
video_info_webpage = self._download_webpage(video_info_url, video_id,
|
||||
note=False,
|
||||
@@ -1445,7 +1449,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'duration': video_duration,
|
||||
'age_limit': 18 if age_gate else 0,
|
||||
'annotations': video_annotations,
|
||||
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
|
||||
'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
|
@@ -1,11 +1,13 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import calendar
|
||||
import contextlib
|
||||
import ctypes
|
||||
import datetime
|
||||
import email.utils
|
||||
import errno
|
||||
import getpass
|
||||
import gzip
|
||||
import itertools
|
||||
import io
|
||||
@@ -500,13 +502,13 @@ def orderedSet(iterable):
|
||||
res.append(el)
|
||||
return res
|
||||
|
||||
def unescapeHTML(s):
|
||||
"""
|
||||
@param s a string
|
||||
"""
|
||||
assert type(s) == type(u'')
|
||||
|
||||
result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
|
||||
def unescapeHTML(s):
|
||||
if s is None:
|
||||
return None
|
||||
assert type(s) == compat_str
|
||||
|
||||
result = re.sub(r'(?u)&(.+?);', htmlentity_transform, s)
|
||||
return result
|
||||
|
||||
|
||||
@@ -760,8 +762,37 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
||||
https_response = http_response
|
||||
|
||||
|
||||
def parse_iso8601(date_str):
|
||||
""" Return a UNIX timestamp from the given date """
|
||||
|
||||
if date_str is None:
|
||||
return None
|
||||
|
||||
m = re.search(
|
||||
r'Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$',
|
||||
date_str)
|
||||
if not m:
|
||||
timezone = datetime.timedelta()
|
||||
else:
|
||||
date_str = date_str[:-len(m.group(0))]
|
||||
if not m.group('sign'):
|
||||
timezone = datetime.timedelta()
|
||||
else:
|
||||
sign = 1 if m.group('sign') == '+' else -1
|
||||
timezone = datetime.timedelta(
|
||||
hours=sign * int(m.group('hours')),
|
||||
minutes=sign * int(m.group('minutes')))
|
||||
|
||||
dt = datetime.datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S') - timezone
|
||||
return calendar.timegm(dt.timetuple())
|
||||
|
||||
|
||||
def unified_strdate(date_str):
|
||||
"""Return a string with the date in the format YYYYMMDD"""
|
||||
|
||||
if date_str is None:
|
||||
return None
|
||||
|
||||
upload_date = None
|
||||
#Replace commas
|
||||
date_str = date_str.replace(',', ' ')
|
||||
@@ -1121,11 +1152,11 @@ def setproctitle(title):
|
||||
libc = ctypes.cdll.LoadLibrary("libc.so.6")
|
||||
except OSError:
|
||||
return
|
||||
title = title
|
||||
buf = ctypes.create_string_buffer(len(title) + 1)
|
||||
buf.value = title.encode('utf-8')
|
||||
title_bytes = title.encode('utf-8')
|
||||
buf = ctypes.create_string_buffer(len(title_bytes))
|
||||
buf.value = title_bytes
|
||||
try:
|
||||
libc.prctl(15, ctypes.byref(buf), 0, 0, 0)
|
||||
libc.prctl(15, buf, 0, 0, 0)
|
||||
except AttributeError:
|
||||
return # Strange libc, just skip this
|
||||
|
||||
@@ -1155,7 +1186,7 @@ def parse_duration(s):
|
||||
return None
|
||||
|
||||
m = re.match(
|
||||
r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?$', s)
|
||||
r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?(?::[0-9]+)?$', s)
|
||||
if not m:
|
||||
return None
|
||||
res = int(m.group('secs'))
|
||||
@@ -1279,3 +1310,25 @@ def parse_xml(s):
|
||||
parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
|
||||
kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
|
||||
return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
|
||||
|
||||
|
||||
if sys.version_info < (3, 0) and sys.platform == 'win32':
|
||||
def compat_getpass(prompt, *args, **kwargs):
|
||||
if isinstance(prompt, compat_str):
|
||||
prompt = prompt.encode(preferredencoding())
|
||||
return getpass.getpass(prompt, *args, **kwargs)
|
||||
else:
|
||||
compat_getpass = getpass.getpass
|
||||
|
||||
|
||||
US_RATINGS = {
|
||||
'G': 0,
|
||||
'PG': 10,
|
||||
'PG-13': 13,
|
||||
'R': 16,
|
||||
'NC': 18,
|
||||
}
|
||||
|
||||
|
||||
def strip_jsonp(code):
|
||||
return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code)
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.03.17'
|
||||
__version__ = '2014.03.24.5'
|
||||
|
Reference in New Issue
Block a user