Compare commits
243 Commits
2014.02.27
...
2013.03.24
Author | SHA1 | Date | |
---|---|---|---|
c577d735c6 | |||
9f0375f61a | |||
5e114e4bfe | |||
83622b6d2f | |||
3d87426c2d | |||
ce328530a9 | |||
f70daac108 | |||
912b38b428 | |||
6e25c58ed7 | |||
51fb2e98d2 | |||
38d63d846e | |||
07cec9776e | |||
ea38e55fff | |||
257cfebfe6 | |||
6eefe53329 | |||
1986025d2b | |||
c9aa111b4f | |||
bfcb6e3917 | |||
2c1396073e | |||
401983c6a0 | |||
391dc3ee07 | |||
be3b8fa30f | |||
9f5809b3e8 | |||
0320ddc192 | |||
56dd55721c | |||
231f76b530 | |||
55442a7812 | |||
43b81eb98a | |||
bfd718793c | |||
a9c2896e22 | |||
278229d195 | |||
fa154d1dbe | |||
7e2ede9891 | |||
74af99fc2f | |||
0f2a2ba14b | |||
e24b5a8610 | |||
750f9020ae | |||
f82863851e | |||
933a5b3792 | |||
aa488e1385 | |||
d77650525d | |||
3e50c29984 | |||
64e7ad6045 | |||
23f4a93bb4 | |||
6f13b055f1 | |||
1f91bd15c3 | |||
11a15be4ce | |||
14e17e18cb | |||
1b124d1942 | |||
747373d4ae | |||
18d367c0a5 | |||
a1a530b067 | |||
cb9722cb3f | |||
773c0b4bb8 | |||
23c322a531 | |||
7e8c0af004 | |||
d2983ccb25 | |||
f24e9833dc | |||
bc2bdf5709 | |||
627a209f74 | |||
1a4895453a | |||
aab74fa106 | |||
2bd9efd4c2 | |||
39a743fb9b | |||
4966a0b22d | |||
fc26023120 | |||
8d7c0cca13 | |||
f66ede4328 | |||
cc88b90ec8 | |||
b6c5fa9a0b | |||
dff10eaa77 | |||
4e6f9aeca1 | |||
e68301af21 | |||
17286a96f2 | |||
0892363e6d | |||
f102372b5f | |||
ecbe1ad207 | |||
9d840c43b5 | |||
6f50f63382 | |||
ff14fc4964 | |||
e125c21531 | |||
93d020dd65 | |||
a7515ec265 | |||
b6c1ceccc2 | |||
4056ad8f36 | |||
6563837ee1 | |||
fd5e6f7ef2 | |||
685052fc7b | |||
15fd51b37c | |||
d95e35d659 | |||
1439073049 | |||
1f7659dbe9 | |||
f1cef7a9ff | |||
8264223511 | |||
bc6d597828 | |||
aba77bbfc2 | |||
955c451456 | |||
e5de3f6c89 | |||
2a1db721d4 | |||
1e0eb60f1a | |||
87a29e6f25 | |||
c3d36f134f | |||
84769e708c | |||
9d2ecdbc71 | |||
9b69af5342 | |||
c21215b421 | |||
cddcfd90b4 | |||
f36aacba0f | |||
355271fb61 | |||
2a5b502364 | |||
98ff9d82d4 | |||
b1ff87224c | |||
b461641fb9 | |||
b047de6f6e | |||
34ca5d9ba0 | |||
60cc4dc4b4 | |||
db95dc13a1 | |||
777ac90791 | |||
04f9bebbcb | |||
4ea3137e41 | |||
a0792b738e | |||
19a41fc613 | |||
3ee52157fb | |||
c4d197ee2d | |||
a33932cfe3 | |||
bcf89ce62c | |||
e3899d0e00 | |||
dcb00da49c | |||
aa51d20d19 | |||
ae7ed92057 | |||
e45b31d9bd | |||
5a25f39653 | |||
963d7ec412 | |||
e712d94adf | |||
6a72423955 | |||
4126826b10 | |||
b773ead7fd | |||
855e2750bc | |||
805ef3c60b | |||
fbc2dcb40b | |||
5375d7ad84 | |||
90f3476180 | |||
ee95c09333 | |||
75d06db9fc | |||
439a1fffcb | |||
9d9d70c462 | |||
b4a186b7be | |||
bdebf51c8f | |||
264b86f9b4 | |||
9e55e37a2e | |||
1471956573 | |||
27865b2169 | |||
6d07ce0162 | |||
edb7fc5435 | |||
31f77343f2 | |||
63ad031583 | |||
957688cee6 | |||
806d6c2e8c | |||
0ef68e04d9 | |||
a496524db2 | |||
935c7360cc | |||
340b046876 | |||
cc1db7f9b7 | |||
a4ff6c4762 | |||
1060425cbb | |||
e9c092f125 | |||
22ff5d2105 | |||
136db7881b | |||
dae313e725 | |||
b74fa8cd2c | |||
94eae04c94 | |||
16ff7ebc77 | |||
c361c505b0 | |||
d37c07c575 | |||
9d6105c9f0 | |||
8dec03ecba | |||
826547870b | |||
52d6a9a61d | |||
ad242b5fbc | |||
3524175625 | |||
7b9965ea93 | |||
0a5bce566f | |||
8012bd2424 | |||
f55a1f0a88 | |||
bacac173a9 | |||
ca1fee34f2 | |||
6dadaa9930 | |||
553f6e4633 | |||
652bee05f0 | |||
d63516e9cd | |||
e477dcf649 | |||
9d3f7781f3 | |||
c7095dada3 | |||
607dbbad76 | |||
17b75c0de1 | |||
ab24f4f3be | |||
e1a52d9e10 | |||
d0ff838433 | |||
b37b94501c | |||
cb3bb2cfef | |||
e2cc7983e9 | |||
c9ae7b9565 | |||
86fb4347f7 | |||
2fcec131f5 | |||
9f62eaf4ef | |||
f92259c026 | |||
0afef30b23 | |||
dcdfd1c711 | |||
2acc1f8f50 | |||
2c39b0c695 | |||
e77c5b4f63 | |||
409a16cb72 | |||
94d5e90b4f | |||
2d73b45805 | |||
271a2dbfa2 | |||
bf4adcac66 | |||
fb8b8fdd62 | |||
5a0b26252e | |||
7d78f0cc48 | |||
f00fc78674 | |||
392017874c | |||
c3cb92d1ab | |||
aa5590fa07 | |||
8cfb5bbf92 | |||
69bb54ebf9 | |||
ca97a56e4b | |||
fc26f3b4c2 | |||
f604c93c64 | |||
dc3727b65c | |||
aba3231de1 | |||
9193bab91d | |||
fbcf3e416d | |||
c0e5d85631 | |||
ca7fa3dcb3 | |||
4ccfba28d9 | |||
abb82f1ddc | |||
546582ec3e | |||
4534485586 | |||
50a138d95c | |||
1b86cc41cf | |||
83cebb8b7a | |||
9e68f9fdf1 | |||
60daf7f0bb |
@ -3,3 +3,5 @@ include test/*.py
|
|||||||
include test/*.json
|
include test/*.json
|
||||||
include youtube-dl.bash-completion
|
include youtube-dl.bash-completion
|
||||||
include youtube-dl.1
|
include youtube-dl.1
|
||||||
|
recursive-include docs *
|
||||||
|
prune docs/_build
|
||||||
|
17
README.md
17
README.md
@ -36,6 +36,9 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
an empty string (--proxy "") for direct
|
an empty string (--proxy "") for direct
|
||||||
connection
|
connection
|
||||||
--no-check-certificate Suppress HTTPS certificate validation.
|
--no-check-certificate Suppress HTTPS certificate validation.
|
||||||
|
--prefer-insecure Use an unencrypted connection to retrieve
|
||||||
|
information about the video. (Currently
|
||||||
|
supported only for YouTube)
|
||||||
--cache-dir DIR Location in the filesystem where youtube-dl
|
--cache-dir DIR Location in the filesystem where youtube-dl
|
||||||
can store some downloaded information
|
can store some downloaded information
|
||||||
permanently. By default $XDG_CACHE_HOME
|
permanently. By default $XDG_CACHE_HOME
|
||||||
@ -124,8 +127,12 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
video id, %(playlist)s for the playlist the
|
video id, %(playlist)s for the playlist the
|
||||||
video is in, %(playlist_index)s for the
|
video is in, %(playlist_index)s for the
|
||||||
position in the playlist and %% for a
|
position in the playlist and %% for a
|
||||||
literal percent. Use - to output to stdout.
|
literal percent. %(height)s and %(width)s
|
||||||
Can also be used to download to a different
|
for the width and height of the video
|
||||||
|
format. %(resolution)s for a textual
|
||||||
|
description of the resolution of the video
|
||||||
|
format. Use - to output to stdout. Can also
|
||||||
|
be used to download to a different
|
||||||
directory, for example with -o '/my/downloa
|
directory, for example with -o '/my/downloa
|
||||||
ds/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
|
ds/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
|
||||||
--autonumber-size NUMBER Specifies the number of digits in
|
--autonumber-size NUMBER Specifies the number of digits in
|
||||||
@ -187,9 +194,9 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
preference using slashes: "-f 22/17/18".
|
preference using slashes: "-f 22/17/18".
|
||||||
"-f mp4" and "-f flv" are also supported.
|
"-f mp4" and "-f flv" are also supported.
|
||||||
You can also use the special names "best",
|
You can also use the special names "best",
|
||||||
"bestaudio", "worst", and "worstaudio". By
|
"bestvideo", "bestaudio", "worst",
|
||||||
default, youtube-dl will pick the best
|
"worstvideo" and "worstaudio". By default,
|
||||||
quality.
|
youtube-dl will pick the best quality.
|
||||||
--all-formats download all available video formats
|
--all-formats download all available video formats
|
||||||
--prefer-free-formats prefer free video formats unless a specific
|
--prefer-free-formats prefer free video formats unless a specific
|
||||||
one is requested
|
one is requested
|
||||||
|
@ -70,7 +70,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
|
|||||||
git checkout HEAD -- youtube-dl youtube-dl.exe
|
git checkout HEAD -- youtube-dl youtube-dl.exe
|
||||||
|
|
||||||
/bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
|
/bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
|
||||||
for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
|
for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
|
||||||
scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
|
scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
|
||||||
ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
|
ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
|
||||||
ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
|
ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
|
||||||
@ -97,7 +97,7 @@ rm -rf build
|
|||||||
|
|
||||||
make pypi-files
|
make pypi-files
|
||||||
echo "Uploading to PyPi ..."
|
echo "Uploading to PyPi ..."
|
||||||
python setup.py sdist upload
|
python setup.py sdist bdist_wheel upload
|
||||||
make clean
|
make clean
|
||||||
|
|
||||||
/bin/echo -e "\n### DONE!"
|
/bin/echo -e "\n### DONE!"
|
||||||
|
1
docs/.gitignore
vendored
Normal file
1
docs/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
_build/
|
177
docs/Makefile
Normal file
177
docs/Makefile
Normal file
@ -0,0 +1,177 @@
|
|||||||
|
# Makefile for Sphinx documentation
|
||||||
|
#
|
||||||
|
|
||||||
|
# You can set these variables from the command line.
|
||||||
|
SPHINXOPTS =
|
||||||
|
SPHINXBUILD = sphinx-build
|
||||||
|
PAPER =
|
||||||
|
BUILDDIR = _build
|
||||||
|
|
||||||
|
# User-friendly check for sphinx-build
|
||||||
|
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
|
||||||
|
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
|
||||||
|
endif
|
||||||
|
|
||||||
|
# Internal variables.
|
||||||
|
PAPEROPT_a4 = -D latex_paper_size=a4
|
||||||
|
PAPEROPT_letter = -D latex_paper_size=letter
|
||||||
|
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
|
||||||
|
# the i18n builder cannot share the environment and doctrees with the others
|
||||||
|
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
|
||||||
|
|
||||||
|
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
|
||||||
|
|
||||||
|
help:
|
||||||
|
@echo "Please use \`make <target>' where <target> is one of"
|
||||||
|
@echo " html to make standalone HTML files"
|
||||||
|
@echo " dirhtml to make HTML files named index.html in directories"
|
||||||
|
@echo " singlehtml to make a single large HTML file"
|
||||||
|
@echo " pickle to make pickle files"
|
||||||
|
@echo " json to make JSON files"
|
||||||
|
@echo " htmlhelp to make HTML files and a HTML help project"
|
||||||
|
@echo " qthelp to make HTML files and a qthelp project"
|
||||||
|
@echo " devhelp to make HTML files and a Devhelp project"
|
||||||
|
@echo " epub to make an epub"
|
||||||
|
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
|
||||||
|
@echo " latexpdf to make LaTeX files and run them through pdflatex"
|
||||||
|
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
|
||||||
|
@echo " text to make text files"
|
||||||
|
@echo " man to make manual pages"
|
||||||
|
@echo " texinfo to make Texinfo files"
|
||||||
|
@echo " info to make Texinfo files and run them through makeinfo"
|
||||||
|
@echo " gettext to make PO message catalogs"
|
||||||
|
@echo " changes to make an overview of all changed/added/deprecated items"
|
||||||
|
@echo " xml to make Docutils-native XML files"
|
||||||
|
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
|
||||||
|
@echo " linkcheck to check all external links for integrity"
|
||||||
|
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -rf $(BUILDDIR)/*
|
||||||
|
|
||||||
|
html:
|
||||||
|
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
|
||||||
|
|
||||||
|
dirhtml:
|
||||||
|
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
|
||||||
|
|
||||||
|
singlehtml:
|
||||||
|
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
|
||||||
|
|
||||||
|
pickle:
|
||||||
|
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
|
||||||
|
@echo
|
||||||
|
@echo "Build finished; now you can process the pickle files."
|
||||||
|
|
||||||
|
json:
|
||||||
|
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
|
||||||
|
@echo
|
||||||
|
@echo "Build finished; now you can process the JSON files."
|
||||||
|
|
||||||
|
htmlhelp:
|
||||||
|
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
|
||||||
|
@echo
|
||||||
|
@echo "Build finished; now you can run HTML Help Workshop with the" \
|
||||||
|
".hhp project file in $(BUILDDIR)/htmlhelp."
|
||||||
|
|
||||||
|
qthelp:
|
||||||
|
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
|
||||||
|
@echo
|
||||||
|
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
|
||||||
|
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
|
||||||
|
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/youtube-dl.qhcp"
|
||||||
|
@echo "To view the help file:"
|
||||||
|
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/youtube-dl.qhc"
|
||||||
|
|
||||||
|
devhelp:
|
||||||
|
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
|
||||||
|
@echo
|
||||||
|
@echo "Build finished."
|
||||||
|
@echo "To view the help file:"
|
||||||
|
@echo "# mkdir -p $$HOME/.local/share/devhelp/youtube-dl"
|
||||||
|
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/youtube-dl"
|
||||||
|
@echo "# devhelp"
|
||||||
|
|
||||||
|
epub:
|
||||||
|
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
|
||||||
|
|
||||||
|
latex:
|
||||||
|
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
||||||
|
@echo
|
||||||
|
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
|
||||||
|
@echo "Run \`make' in that directory to run these through (pdf)latex" \
|
||||||
|
"(use \`make latexpdf' here to do that automatically)."
|
||||||
|
|
||||||
|
latexpdf:
|
||||||
|
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
||||||
|
@echo "Running LaTeX files through pdflatex..."
|
||||||
|
$(MAKE) -C $(BUILDDIR)/latex all-pdf
|
||||||
|
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
|
||||||
|
|
||||||
|
latexpdfja:
|
||||||
|
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
||||||
|
@echo "Running LaTeX files through platex and dvipdfmx..."
|
||||||
|
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
|
||||||
|
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
|
||||||
|
|
||||||
|
text:
|
||||||
|
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The text files are in $(BUILDDIR)/text."
|
||||||
|
|
||||||
|
man:
|
||||||
|
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
|
||||||
|
|
||||||
|
texinfo:
|
||||||
|
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
|
||||||
|
@echo "Run \`make' in that directory to run these through makeinfo" \
|
||||||
|
"(use \`make info' here to do that automatically)."
|
||||||
|
|
||||||
|
info:
|
||||||
|
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
|
||||||
|
@echo "Running Texinfo files through makeinfo..."
|
||||||
|
make -C $(BUILDDIR)/texinfo info
|
||||||
|
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
|
||||||
|
|
||||||
|
gettext:
|
||||||
|
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
|
||||||
|
|
||||||
|
changes:
|
||||||
|
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
|
||||||
|
@echo
|
||||||
|
@echo "The overview file is in $(BUILDDIR)/changes."
|
||||||
|
|
||||||
|
linkcheck:
|
||||||
|
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
|
||||||
|
@echo
|
||||||
|
@echo "Link check complete; look for any errors in the above output " \
|
||||||
|
"or in $(BUILDDIR)/linkcheck/output.txt."
|
||||||
|
|
||||||
|
doctest:
|
||||||
|
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
|
||||||
|
@echo "Testing of doctests in the sources finished, look at the " \
|
||||||
|
"results in $(BUILDDIR)/doctest/output.txt."
|
||||||
|
|
||||||
|
xml:
|
||||||
|
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
|
||||||
|
|
||||||
|
pseudoxml:
|
||||||
|
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
|
71
docs/conf.py
Normal file
71
docs/conf.py
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# youtube-dl documentation build configuration file, created by
|
||||||
|
# sphinx-quickstart on Fri Mar 14 21:05:43 2014.
|
||||||
|
#
|
||||||
|
# This file is execfile()d with the current directory set to its
|
||||||
|
# containing dir.
|
||||||
|
#
|
||||||
|
# Note that not all possible configuration values are present in this
|
||||||
|
# autogenerated file.
|
||||||
|
#
|
||||||
|
# All configuration values have a default; values that are commented out
|
||||||
|
# serve to show the default.
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
# Allows to import youtube_dl
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
# -- General configuration ------------------------------------------------
|
||||||
|
|
||||||
|
# Add any Sphinx extension module names here, as strings. They can be
|
||||||
|
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
||||||
|
# ones.
|
||||||
|
extensions = [
|
||||||
|
'sphinx.ext.autodoc',
|
||||||
|
]
|
||||||
|
|
||||||
|
# Add any paths that contain templates here, relative to this directory.
|
||||||
|
templates_path = ['_templates']
|
||||||
|
|
||||||
|
# The suffix of source filenames.
|
||||||
|
source_suffix = '.rst'
|
||||||
|
|
||||||
|
# The master toctree document.
|
||||||
|
master_doc = 'index'
|
||||||
|
|
||||||
|
# General information about the project.
|
||||||
|
project = u'youtube-dl'
|
||||||
|
copyright = u'2014, Ricardo Garcia Gonzalez'
|
||||||
|
|
||||||
|
# The version info for the project you're documenting, acts as replacement for
|
||||||
|
# |version| and |release|, also used in various other places throughout the
|
||||||
|
# built documents.
|
||||||
|
#
|
||||||
|
# The short X.Y version.
|
||||||
|
import youtube_dl
|
||||||
|
version = youtube_dl.__version__
|
||||||
|
# The full version, including alpha/beta/rc tags.
|
||||||
|
release = version
|
||||||
|
|
||||||
|
# List of patterns, relative to source directory, that match files and
|
||||||
|
# directories to ignore when looking for source files.
|
||||||
|
exclude_patterns = ['_build']
|
||||||
|
|
||||||
|
# The name of the Pygments (syntax highlighting) style to use.
|
||||||
|
pygments_style = 'sphinx'
|
||||||
|
|
||||||
|
# -- Options for HTML output ----------------------------------------------
|
||||||
|
|
||||||
|
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||||
|
# a list of builtin themes.
|
||||||
|
html_theme = 'default'
|
||||||
|
|
||||||
|
# Add any paths that contain custom static files (such as style sheets) here,
|
||||||
|
# relative to this directory. They are copied after the builtin static files,
|
||||||
|
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||||
|
html_static_path = ['_static']
|
||||||
|
|
||||||
|
# Output file base name for HTML help builder.
|
||||||
|
htmlhelp_basename = 'youtube-dldoc'
|
23
docs/index.rst
Normal file
23
docs/index.rst
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
Welcome to youtube-dl's documentation!
|
||||||
|
======================================
|
||||||
|
|
||||||
|
*youtube-dl* is a command-line program to download videos from YouTube.com and more sites.
|
||||||
|
It can also be used in Python code.
|
||||||
|
|
||||||
|
Developer guide
|
||||||
|
---------------
|
||||||
|
|
||||||
|
This section contains information for using *youtube-dl* from Python programs.
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
|
||||||
|
module_guide
|
||||||
|
|
||||||
|
Indices and tables
|
||||||
|
==================
|
||||||
|
|
||||||
|
* :ref:`genindex`
|
||||||
|
* :ref:`modindex`
|
||||||
|
* :ref:`search`
|
||||||
|
|
67
docs/module_guide.rst
Normal file
67
docs/module_guide.rst
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
Using the ``youtube_dl`` module
|
||||||
|
===============================
|
||||||
|
|
||||||
|
When using the ``youtube_dl`` module, you start by creating an instance of :class:`YoutubeDL` and adding all the available extractors:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
>>> from youtube_dl import YoutubeDL
|
||||||
|
>>> ydl = YoutubeDL()
|
||||||
|
>>> ydl.add_default_info_extractors()
|
||||||
|
|
||||||
|
Extracting video information
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
You use the :meth:`YoutubeDL.extract_info` method for getting the video information, which returns a dictionary:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
>>> info = ydl.extract_info('http://www.youtube.com/watch?v=BaW_jenozKc', download=False)
|
||||||
|
[youtube] Setting language
|
||||||
|
[youtube] BaW_jenozKc: Downloading webpage
|
||||||
|
[youtube] BaW_jenozKc: Downloading video info webpage
|
||||||
|
[youtube] BaW_jenozKc: Extracting video information
|
||||||
|
>>> info['title']
|
||||||
|
'youtube-dl test video "\'/\\ä↭𝕐'
|
||||||
|
>>> info['height'], info['width']
|
||||||
|
(720, 1280)
|
||||||
|
|
||||||
|
If you want to download or play the video you can get its url:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
>>> info['url']
|
||||||
|
'https://...'
|
||||||
|
|
||||||
|
Extracting playlist information
|
||||||
|
-------------------------------
|
||||||
|
|
||||||
|
The playlist information is extracted in a similar way, but the dictionary is a bit different:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
>>> playlist = ydl.extract_info('http://www.ted.com/playlists/13/open_source_open_world', download=False)
|
||||||
|
[TED] open_source_open_world: Downloading playlist webpage
|
||||||
|
...
|
||||||
|
>>> playlist['title']
|
||||||
|
'Open-source, open world'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
You can access the videos in the playlist with the ``entries`` field:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
>>> for video in playlist['entries']:
|
||||||
|
... print('Video #%d: %s' % (video['playlist_index'], video['title']))
|
||||||
|
|
||||||
|
Video #1: How Arduino is open-sourcing imagination
|
||||||
|
Video #2: The year open data went worldwide
|
||||||
|
Video #3: Massive-scale online collaboration
|
||||||
|
Video #4: The art of asking
|
||||||
|
Video #5: How cognitive surplus will change the world
|
||||||
|
Video #6: The birth of Wikipedia
|
||||||
|
Video #7: Coding a better government
|
||||||
|
Video #8: The era of open innovation
|
||||||
|
Video #9: The currency of the new economy is trust
|
||||||
|
|
@ -9,7 +9,10 @@ import sys
|
|||||||
|
|
||||||
import youtube_dl.extractor
|
import youtube_dl.extractor
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
from youtube_dl.utils import preferredencoding
|
from youtube_dl.utils import (
|
||||||
|
compat_str,
|
||||||
|
preferredencoding,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_params(override=None):
|
def get_params(override=None):
|
||||||
@ -71,7 +74,7 @@ class FakeYDL(YoutubeDL):
|
|||||||
old_report_warning(message)
|
old_report_warning(message)
|
||||||
self.report_warning = types.MethodType(report_warning, self)
|
self.report_warning = types.MethodType(report_warning, self)
|
||||||
|
|
||||||
def get_testcases():
|
def gettestcases():
|
||||||
for ie in youtube_dl.extractor.gen_extractors():
|
for ie in youtube_dl.extractor.gen_extractors():
|
||||||
t = getattr(ie, '_TEST', None)
|
t = getattr(ie, '_TEST', None)
|
||||||
if t:
|
if t:
|
||||||
@ -83,3 +86,45 @@ def get_testcases():
|
|||||||
|
|
||||||
|
|
||||||
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
|
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def expect_info_dict(self, expected_dict, got_dict):
|
||||||
|
for info_field, expected in expected_dict.items():
|
||||||
|
if isinstance(expected, compat_str) and expected.startswith('re:'):
|
||||||
|
got = got_dict.get(info_field)
|
||||||
|
match_str = expected[len('re:'):]
|
||||||
|
match_rex = re.compile(match_str)
|
||||||
|
|
||||||
|
self.assertTrue(
|
||||||
|
isinstance(got, compat_str) and match_rex.match(got),
|
||||||
|
u'field %s (value: %r) should match %r' % (info_field, got, match_str))
|
||||||
|
elif isinstance(expected, type):
|
||||||
|
got = got_dict.get(info_field)
|
||||||
|
self.assertTrue(isinstance(got, expected),
|
||||||
|
u'Expected type %r, but got value %r of type %r' % (expected, got, type(got)))
|
||||||
|
else:
|
||||||
|
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||||
|
got = 'md5:' + md5(got_dict.get(info_field))
|
||||||
|
else:
|
||||||
|
got = got_dict.get(info_field)
|
||||||
|
self.assertEqual(expected, got,
|
||||||
|
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
||||||
|
|
||||||
|
# Check for the presence of mandatory fields
|
||||||
|
for key in ('id', 'url', 'title', 'ext'):
|
||||||
|
self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
|
||||||
|
# Check for mandatory fields that are automatically set by YoutubeDL
|
||||||
|
for key in ['webpage_url', 'extractor', 'extractor_key']:
|
||||||
|
self.assertTrue(got_dict.get(key), u'Missing field: %s' % key)
|
||||||
|
|
||||||
|
# Are checkable fields missing from the test case definition?
|
||||||
|
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
||||||
|
for key, value in got_dict.items()
|
||||||
|
if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
|
||||||
|
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
|
||||||
|
if missing_keys:
|
||||||
|
sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
|
||||||
|
self.assertFalse(
|
||||||
|
missing_keys,
|
||||||
|
'Missing keys in test definition: %s' % (
|
||||||
|
', '.join(sorted(missing_keys))))
|
||||||
|
44
test/test_InfoExtractor.py
Normal file
44
test/test_InfoExtractor.py
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from test.helper import FakeYDL
|
||||||
|
from youtube_dl.extractor.common import InfoExtractor
|
||||||
|
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
||||||
|
|
||||||
|
|
||||||
|
class TestIE(InfoExtractor):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class TestInfoExtractor(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.ie = TestIE(FakeYDL())
|
||||||
|
|
||||||
|
def test_ie_key(self):
|
||||||
|
self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
|
||||||
|
|
||||||
|
def test_html_search_regex(self):
|
||||||
|
html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>'
|
||||||
|
search = lambda re, *args: self.ie._html_search_regex(re, html, *args)
|
||||||
|
self.assertEqual(search(r'<p id="foo">(.+?)</p>', 'foo'), 'Watch this video')
|
||||||
|
|
||||||
|
def test_opengraph(self):
|
||||||
|
ie = self.ie
|
||||||
|
html = '''
|
||||||
|
<meta name="og:title" content='Foo'/>
|
||||||
|
<meta content="Some video's description " name="og:description"/>
|
||||||
|
<meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&key2=val2'/>
|
||||||
|
'''
|
||||||
|
self.assertEqual(ie._og_search_title(html), 'Foo')
|
||||||
|
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
||||||
|
self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
@ -182,6 +182,24 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
downloaded = ydl.downloaded_info_dicts[0]
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
self.assertEqual(downloaded['format_id'], 'vid-high')
|
self.assertEqual(downloaded['format_id'], 'vid-high')
|
||||||
|
|
||||||
|
def test_format_selection_video(self):
|
||||||
|
formats = [
|
||||||
|
{'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none'},
|
||||||
|
{'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none'},
|
||||||
|
{'format_id': 'vid', 'ext': 'mp4', 'preference': 3},
|
||||||
|
]
|
||||||
|
info_dict = {'formats': formats, 'extractor': 'test'}
|
||||||
|
|
||||||
|
ydl = YDL({'format': 'bestvideo'})
|
||||||
|
ydl.process_ie_result(info_dict.copy())
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'dash-video-high')
|
||||||
|
|
||||||
|
ydl = YDL({'format': 'worstvideo'})
|
||||||
|
ydl.process_ie_result(info_dict.copy())
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'dash-video-low')
|
||||||
|
|
||||||
def test_youtube_format_selection(self):
|
def test_youtube_format_selection(self):
|
||||||
order = [
|
order = [
|
||||||
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '36', '17', '13',
|
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '36', '17', '13',
|
||||||
|
@ -9,7 +9,7 @@ import unittest
|
|||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
|
||||||
from test.helper import get_testcases
|
from test.helper import gettestcases
|
||||||
|
|
||||||
from youtube_dl.extractor import (
|
from youtube_dl.extractor import (
|
||||||
FacebookIE,
|
FacebookIE,
|
||||||
@ -71,6 +71,10 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
def test_youtube_truncated(self):
|
def test_youtube_truncated(self):
|
||||||
self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url'])
|
self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url'])
|
||||||
|
|
||||||
|
def test_youtube_search_matching(self):
|
||||||
|
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
||||||
|
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
||||||
|
|
||||||
def test_justin_tv_channelid_matching(self):
|
def test_justin_tv_channelid_matching(self):
|
||||||
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
|
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
|
||||||
self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
|
self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
|
||||||
@ -101,7 +105,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
|
|
||||||
def test_no_duplicates(self):
|
def test_no_duplicates(self):
|
||||||
ies = gen_extractors()
|
ies = gen_extractors()
|
||||||
for tc in get_testcases():
|
for tc in gettestcases():
|
||||||
url = tc['url']
|
url = tc['url']
|
||||||
for ie in ies:
|
for ie in ies:
|
||||||
if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
|
if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
|
||||||
@ -120,6 +124,8 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
|
|
||||||
def test_vimeo_matching(self):
|
def test_vimeo_matching(self):
|
||||||
self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
|
self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
|
||||||
|
self.assertMatch('http://vimeo.com/channels/31259', ['vimeo:channel'])
|
||||||
|
self.assertMatch('http://vimeo.com/channels/31259/53576664', ['vimeo'])
|
||||||
self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user'])
|
self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user'])
|
||||||
self.assertMatch('http://vimeo.com/user7108434/videos', ['vimeo:user'])
|
self.assertMatch('http://vimeo.com/user7108434/videos', ['vimeo:user'])
|
||||||
self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review'])
|
self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review'])
|
||||||
@ -135,6 +141,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
def test_pbs(self):
|
def test_pbs(self):
|
||||||
# https://github.com/rg3/youtube-dl/issues/2350
|
# https://github.com/rg3/youtube-dl/issues/2350
|
||||||
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
|
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
|
||||||
|
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -8,17 +8,17 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||||||
|
|
||||||
from test.helper import (
|
from test.helper import (
|
||||||
get_params,
|
get_params,
|
||||||
get_testcases,
|
gettestcases,
|
||||||
try_rm,
|
expect_info_dict,
|
||||||
md5,
|
md5,
|
||||||
report_warning
|
try_rm,
|
||||||
|
report_warning,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import re
|
|
||||||
import socket
|
import socket
|
||||||
|
|
||||||
import youtube_dl.YoutubeDL
|
import youtube_dl.YoutubeDL
|
||||||
@ -51,7 +51,7 @@ def _file_md5(fn):
|
|||||||
with open(fn, 'rb') as f:
|
with open(fn, 'rb') as f:
|
||||||
return hashlib.md5(f.read()).hexdigest()
|
return hashlib.md5(f.read()).hexdigest()
|
||||||
|
|
||||||
defs = get_testcases()
|
defs = gettestcases()
|
||||||
|
|
||||||
|
|
||||||
class TestDownload(unittest.TestCase):
|
class TestDownload(unittest.TestCase):
|
||||||
@ -135,36 +135,8 @@ def generator(test_case):
|
|||||||
self.assertEqual(md5_for_file, tc['md5'])
|
self.assertEqual(md5_for_file, tc['md5'])
|
||||||
with io.open(info_json_fn, encoding='utf-8') as infof:
|
with io.open(info_json_fn, encoding='utf-8') as infof:
|
||||||
info_dict = json.load(infof)
|
info_dict = json.load(infof)
|
||||||
for (info_field, expected) in tc.get('info_dict', {}).items():
|
|
||||||
if isinstance(expected, compat_str) and expected.startswith('re:'):
|
|
||||||
got = info_dict.get(info_field)
|
|
||||||
match_str = expected[len('re:'):]
|
|
||||||
match_rex = re.compile(match_str)
|
|
||||||
|
|
||||||
self.assertTrue(
|
expect_info_dict(self, tc.get('info_dict', {}), info_dict)
|
||||||
isinstance(got, compat_str) and match_rex.match(got),
|
|
||||||
u'field %s (value: %r) should match %r' % (info_field, got, match_str))
|
|
||||||
else:
|
|
||||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
|
||||||
got = 'md5:' + md5(info_dict.get(info_field))
|
|
||||||
else:
|
|
||||||
got = info_dict.get(info_field)
|
|
||||||
self.assertEqual(expected, got,
|
|
||||||
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
|
||||||
|
|
||||||
# If checkable fields are missing from the test case, print the info_dict
|
|
||||||
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
|
||||||
for key, value in info_dict.items()
|
|
||||||
if value and key in ('title', 'description', 'uploader', 'upload_date', 'uploader_id', 'location'))
|
|
||||||
if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()):
|
|
||||||
sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
|
|
||||||
|
|
||||||
# Check for the presence of mandatory fields
|
|
||||||
for key in ('id', 'url', 'title', 'ext'):
|
|
||||||
self.assertTrue(key in info_dict.keys() and info_dict[key])
|
|
||||||
# Check for mandatory fields that are automatically set by YoutubeDL
|
|
||||||
for key in ['webpage_url', 'extractor', 'extractor_key']:
|
|
||||||
self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
|
|
||||||
finally:
|
finally:
|
||||||
try_rm_tcs_files()
|
try_rm_tcs_files()
|
||||||
|
|
||||||
|
@ -9,8 +9,10 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import FakeYDL
|
from test.helper import (
|
||||||
|
expect_info_dict,
|
||||||
|
FakeYDL,
|
||||||
|
)
|
||||||
|
|
||||||
from youtube_dl.extractor import (
|
from youtube_dl.extractor import (
|
||||||
AcademicEarthCourseIE,
|
AcademicEarthCourseIE,
|
||||||
@ -36,6 +38,10 @@ from youtube_dl.extractor import (
|
|||||||
RutubeChannelIE,
|
RutubeChannelIE,
|
||||||
GoogleSearchIE,
|
GoogleSearchIE,
|
||||||
GenericIE,
|
GenericIE,
|
||||||
|
TEDIE,
|
||||||
|
ToypicsUserIE,
|
||||||
|
XTubeUserIE,
|
||||||
|
InstagramUserIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -98,7 +104,7 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty')
|
result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty')
|
||||||
self.assertIsPlaylist(result)
|
self.assertIsPlaylist(result)
|
||||||
self.assertEqual(result['id'], '5124905')
|
self.assertEqual(result['id'], '5124905')
|
||||||
self.assertTrue(len(result['entries']) >= 11)
|
self.assertTrue(len(result['entries']) >= 6)
|
||||||
|
|
||||||
def test_soundcloud_set(self):
|
def test_soundcloud_set(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
@ -248,16 +254,66 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
self.assertIsPlaylist(result)
|
self.assertIsPlaylist(result)
|
||||||
self.assertEqual(result['id'], 'python language')
|
self.assertEqual(result['id'], 'python language')
|
||||||
self.assertEqual(result['title'], 'python language')
|
self.assertEqual(result['title'], 'python language')
|
||||||
self.assertTrue(len(result['entries']) == 15)
|
self.assertEqual(len(result['entries']), 15)
|
||||||
|
|
||||||
def test_generic_rss_feed(self):
|
def test_generic_rss_feed(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = GenericIE(dl)
|
ie = GenericIE(dl)
|
||||||
result = ie.extract('http://www.escapistmagazine.com/rss/videos/list/1.xml')
|
result = ie.extract('http://phihag.de/2014/youtube-dl/rss.xml')
|
||||||
self.assertIsPlaylist(result)
|
self.assertIsPlaylist(result)
|
||||||
self.assertEqual(result['id'], 'http://www.escapistmagazine.com/rss/videos/list/1.xml')
|
self.assertEqual(result['id'], 'http://phihag.de/2014/youtube-dl/rss.xml')
|
||||||
self.assertEqual(result['title'], 'Zero Punctuation')
|
self.assertEqual(result['title'], 'Zero Punctuation')
|
||||||
self.assertTrue(len(result['entries']) > 10)
|
self.assertTrue(len(result['entries']) > 10)
|
||||||
|
|
||||||
|
def test_ted_playlist(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = TEDIE(dl)
|
||||||
|
result = ie.extract('http://www.ted.com/playlists/who_are_the_hackers')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['id'], '10')
|
||||||
|
self.assertEqual(result['title'], 'Who are the hackers?')
|
||||||
|
self.assertTrue(len(result['entries']) >= 6)
|
||||||
|
|
||||||
|
def test_toypics_user(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = ToypicsUserIE(dl)
|
||||||
|
result = ie.extract('http://videos.toypics.net/Mikey')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['id'], 'Mikey')
|
||||||
|
self.assertTrue(len(result['entries']) >= 17)
|
||||||
|
|
||||||
|
def test_xtube_user(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = XTubeUserIE(dl)
|
||||||
|
result = ie.extract('http://www.xtube.com/community/profile.php?user=greenshowers')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['id'], 'greenshowers')
|
||||||
|
self.assertTrue(len(result['entries']) >= 155)
|
||||||
|
|
||||||
|
def test_InstagramUser(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = InstagramUserIE(dl)
|
||||||
|
result = ie.extract('http://instagram.com/porsche')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['id'], 'porsche')
|
||||||
|
self.assertTrue(len(result['entries']) >= 2)
|
||||||
|
test_video = next(
|
||||||
|
e for e in result['entries']
|
||||||
|
if e['id'] == '614605558512799803_462752227')
|
||||||
|
dl.add_default_extra_info(test_video, ie, '(irrelevant URL)')
|
||||||
|
dl.process_video_result(test_video, download=False)
|
||||||
|
EXPECTED = {
|
||||||
|
'id': '614605558512799803_462752227',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '#Porsche Intelligent Performance.',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'uploader': 'Porsche',
|
||||||
|
'uploader_id': 'porsche',
|
||||||
|
'timestamp': 1387486713,
|
||||||
|
'upload_date': '20131219',
|
||||||
|
}
|
||||||
|
expect_info_dict(self, EXPECTED, test_video)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -33,7 +33,9 @@ from youtube_dl.utils import (
|
|||||||
unified_strdate,
|
unified_strdate,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
url_basename,
|
url_basename,
|
||||||
|
urlencode_postdata,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
|
parse_iso8601,
|
||||||
)
|
)
|
||||||
|
|
||||||
if sys.version_info < (3, 0):
|
if sys.version_info < (3, 0):
|
||||||
@ -261,5 +263,14 @@ class TestUtil(unittest.TestCase):
|
|||||||
bam''')
|
bam''')
|
||||||
self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])
|
self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])
|
||||||
|
|
||||||
|
def test_urlencode_postdata(self):
|
||||||
|
data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
|
||||||
|
self.assertTrue(isinstance(data, bytes))
|
||||||
|
|
||||||
|
def test_parse_iso8601(self):
|
||||||
|
self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266)
|
||||||
|
self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
|
||||||
|
self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -16,6 +16,7 @@ from youtube_dl.extractor import (
|
|||||||
YoutubeChannelIE,
|
YoutubeChannelIE,
|
||||||
YoutubeShowIE,
|
YoutubeShowIE,
|
||||||
YoutubeTopListIE,
|
YoutubeTopListIE,
|
||||||
|
YoutubeSearchURLIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -133,5 +134,14 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
entries = result['entries']
|
entries = result['entries']
|
||||||
self.assertTrue(len(entries) >= 5)
|
self.assertTrue(len(entries) >= 5)
|
||||||
|
|
||||||
|
def test_youtube_search_url(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = YoutubeSearchURLIE(dl)
|
||||||
|
result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video')
|
||||||
|
entries = result['entries']
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['title'], 'youtube-dl test video')
|
||||||
|
self.assertTrue(len(entries) >= 5)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -1,4 +0,0 @@
|
|||||||
# Legacy file for backwards compatibility, use youtube_dl.extractor instead!
|
|
||||||
|
|
||||||
from .extractor.common import InfoExtractor, SearchInfoExtractor
|
|
||||||
from .extractor import gen_extractors, get_info_extractor
|
|
@ -4,6 +4,7 @@
|
|||||||
from __future__ import absolute_import, unicode_literals
|
from __future__ import absolute_import, unicode_literals
|
||||||
|
|
||||||
import collections
|
import collections
|
||||||
|
import datetime
|
||||||
import errno
|
import errno
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
@ -147,6 +148,8 @@ class YoutubeDL(object):
|
|||||||
again.
|
again.
|
||||||
cookiefile: File name where cookies should be read from and dumped to.
|
cookiefile: File name where cookies should be read from and dumped to.
|
||||||
nocheckcertificate:Do not verify SSL certificates
|
nocheckcertificate:Do not verify SSL certificates
|
||||||
|
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
|
||||||
|
At the moment, this is only supported by YouTube.
|
||||||
proxy: URL of the proxy server to use
|
proxy: URL of the proxy server to use
|
||||||
socket_timeout: Time to wait for unresponsive hosts, in seconds
|
socket_timeout: Time to wait for unresponsive hosts, in seconds
|
||||||
bidi_workaround: Work around buggy terminals without bidirectional text
|
bidi_workaround: Work around buggy terminals without bidirectional text
|
||||||
@ -370,12 +373,15 @@ class YoutubeDL(object):
|
|||||||
Print the message to stderr, it will be prefixed with 'WARNING:'
|
Print the message to stderr, it will be prefixed with 'WARNING:'
|
||||||
If stderr is a tty file the 'WARNING:' will be colored
|
If stderr is a tty file the 'WARNING:' will be colored
|
||||||
'''
|
'''
|
||||||
if self._err_file.isatty() and os.name != 'nt':
|
if self.params.get('logger') is not None:
|
||||||
_msg_header = '\033[0;33mWARNING:\033[0m'
|
self.params['logger'].warning(message)
|
||||||
else:
|
else:
|
||||||
_msg_header = 'WARNING:'
|
if self._err_file.isatty() and os.name != 'nt':
|
||||||
warning_message = '%s %s' % (_msg_header, message)
|
_msg_header = '\033[0;33mWARNING:\033[0m'
|
||||||
self.to_stderr(warning_message)
|
else:
|
||||||
|
_msg_header = 'WARNING:'
|
||||||
|
warning_message = '%s %s' % (_msg_header, message)
|
||||||
|
self.to_stderr(warning_message)
|
||||||
|
|
||||||
def report_error(self, message, tb=None):
|
def report_error(self, message, tb=None):
|
||||||
'''
|
'''
|
||||||
@ -409,6 +415,13 @@ class YoutubeDL(object):
|
|||||||
template_dict['autonumber'] = autonumber_templ % self._num_downloads
|
template_dict['autonumber'] = autonumber_templ % self._num_downloads
|
||||||
if template_dict.get('playlist_index') is not None:
|
if template_dict.get('playlist_index') is not None:
|
||||||
template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
|
template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
|
||||||
|
if template_dict.get('resolution') is None:
|
||||||
|
if template_dict.get('width') and template_dict.get('height'):
|
||||||
|
template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
|
||||||
|
elif template_dict.get('height'):
|
||||||
|
template_dict['resolution'] = '%sp' % template_dict['height']
|
||||||
|
elif template_dict.get('width'):
|
||||||
|
template_dict['resolution'] = '?x%d' % template_dict['width']
|
||||||
|
|
||||||
sanitize = lambda k, v: sanitize_filename(
|
sanitize = lambda k, v: sanitize_filename(
|
||||||
compat_str(v),
|
compat_str(v),
|
||||||
@ -499,13 +512,7 @@ class YoutubeDL(object):
|
|||||||
'_type': 'compat_list',
|
'_type': 'compat_list',
|
||||||
'entries': ie_result,
|
'entries': ie_result,
|
||||||
}
|
}
|
||||||
self.add_extra_info(ie_result,
|
self.add_default_extra_info(ie_result, ie, url)
|
||||||
{
|
|
||||||
'extractor': ie.IE_NAME,
|
|
||||||
'webpage_url': url,
|
|
||||||
'webpage_url_basename': url_basename(url),
|
|
||||||
'extractor_key': ie.ie_key(),
|
|
||||||
})
|
|
||||||
if process:
|
if process:
|
||||||
return self.process_ie_result(ie_result, download, extra_info)
|
return self.process_ie_result(ie_result, download, extra_info)
|
||||||
else:
|
else:
|
||||||
@ -522,7 +529,15 @@ class YoutubeDL(object):
|
|||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
else:
|
else:
|
||||||
self.report_error('no suitable InfoExtractor: %s' % url)
|
self.report_error('no suitable InfoExtractor for URL %s' % url)
|
||||||
|
|
||||||
|
def add_default_extra_info(self, ie_result, ie, url):
|
||||||
|
self.add_extra_info(ie_result, {
|
||||||
|
'extractor': ie.IE_NAME,
|
||||||
|
'webpage_url': url,
|
||||||
|
'webpage_url_basename': url_basename(url),
|
||||||
|
'extractor_key': ie.ie_key(),
|
||||||
|
})
|
||||||
|
|
||||||
def process_ie_result(self, ie_result, download=True, extra_info={}):
|
def process_ie_result(self, ie_result, download=True, extra_info={}):
|
||||||
"""
|
"""
|
||||||
@ -656,6 +671,18 @@ class YoutubeDL(object):
|
|||||||
if f.get('vcodec') == 'none']
|
if f.get('vcodec') == 'none']
|
||||||
if audio_formats:
|
if audio_formats:
|
||||||
return audio_formats[0]
|
return audio_formats[0]
|
||||||
|
elif format_spec == 'bestvideo':
|
||||||
|
video_formats = [
|
||||||
|
f for f in available_formats
|
||||||
|
if f.get('acodec') == 'none']
|
||||||
|
if video_formats:
|
||||||
|
return video_formats[-1]
|
||||||
|
elif format_spec == 'worstvideo':
|
||||||
|
video_formats = [
|
||||||
|
f for f in available_formats
|
||||||
|
if f.get('acodec') == 'none']
|
||||||
|
if video_formats:
|
||||||
|
return video_formats[0]
|
||||||
else:
|
else:
|
||||||
extensions = ['mp4', 'flv', 'webm', '3gp']
|
extensions = ['mp4', 'flv', 'webm', '3gp']
|
||||||
if format_spec in extensions:
|
if format_spec in extensions:
|
||||||
@ -675,6 +702,14 @@ class YoutubeDL(object):
|
|||||||
info_dict['playlist'] = None
|
info_dict['playlist'] = None
|
||||||
info_dict['playlist_index'] = None
|
info_dict['playlist_index'] = None
|
||||||
|
|
||||||
|
if 'display_id' not in info_dict and 'id' in info_dict:
|
||||||
|
info_dict['display_id'] = info_dict['id']
|
||||||
|
|
||||||
|
if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
|
||||||
|
upload_date = datetime.datetime.utcfromtimestamp(
|
||||||
|
info_dict['timestamp'])
|
||||||
|
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
|
||||||
|
|
||||||
# This extractors handle format selection themselves
|
# This extractors handle format selection themselves
|
||||||
if info_dict['extractor'] in ['Youku']:
|
if info_dict['extractor'] in ['Youku']:
|
||||||
if download:
|
if download:
|
||||||
@ -688,8 +723,11 @@ class YoutubeDL(object):
|
|||||||
else:
|
else:
|
||||||
formats = info_dict['formats']
|
formats = info_dict['formats']
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
raise ExtractorError('No video formats found!')
|
||||||
|
|
||||||
# We check that all the formats have the format and format_id fields
|
# We check that all the formats have the format and format_id fields
|
||||||
for (i, format) in enumerate(formats):
|
for i, format in enumerate(formats):
|
||||||
if format.get('format_id') is None:
|
if format.get('format_id') is None:
|
||||||
format['format_id'] = compat_str(i)
|
format['format_id'] = compat_str(i)
|
||||||
if format.get('format') is None:
|
if format.get('format') is None:
|
||||||
@ -908,7 +946,7 @@ class YoutubeDL(object):
|
|||||||
self.to_screen('[%s] %s: Downloading thumbnail ...' %
|
self.to_screen('[%s] %s: Downloading thumbnail ...' %
|
||||||
(info_dict['extractor'], info_dict['id']))
|
(info_dict['extractor'], info_dict['id']))
|
||||||
try:
|
try:
|
||||||
uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
|
uf = self.urlopen(info_dict['thumbnail'])
|
||||||
with open(thumb_filename, 'wb') as thumbf:
|
with open(thumb_filename, 'wb') as thumbf:
|
||||||
shutil.copyfileobj(uf, thumbf)
|
shutil.copyfileobj(uf, thumbf)
|
||||||
self.to_screen('[%s] %s: Writing thumbnail to: %s' %
|
self.to_screen('[%s] %s: Writing thumbnail to: %s' %
|
||||||
@ -1154,7 +1192,7 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
def urlopen(self, req):
|
def urlopen(self, req):
|
||||||
""" Start an HTTP download """
|
""" Start an HTTP download """
|
||||||
return self._opener.open(req)
|
return self._opener.open(req, timeout=self._socket_timeout)
|
||||||
|
|
||||||
def print_debug_header(self):
|
def print_debug_header(self):
|
||||||
if not self.params.get('verbose'):
|
if not self.params.get('verbose'):
|
||||||
@ -1185,7 +1223,7 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
def _setup_opener(self):
|
def _setup_opener(self):
|
||||||
timeout_val = self.params.get('socket_timeout')
|
timeout_val = self.params.get('socket_timeout')
|
||||||
timeout = 600 if timeout_val is None else float(timeout_val)
|
self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
|
||||||
|
|
||||||
opts_cookiefile = self.params.get('cookiefile')
|
opts_cookiefile = self.params.get('cookiefile')
|
||||||
opts_proxy = self.params.get('proxy')
|
opts_proxy = self.params.get('proxy')
|
||||||
@ -1223,7 +1261,3 @@ class YoutubeDL(object):
|
|||||||
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
|
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
|
||||||
opener.addheaders = []
|
opener.addheaders = []
|
||||||
self._opener = opener
|
self._opener = opener
|
||||||
|
|
||||||
# TODO remove this global modification
|
|
||||||
compat_urllib_request.install_opener(opener)
|
|
||||||
socket.setdefaulttimeout(timeout)
|
|
||||||
|
@ -48,12 +48,14 @@ __authors__ = (
|
|||||||
'Niklas Laxström',
|
'Niklas Laxström',
|
||||||
'David Triendl',
|
'David Triendl',
|
||||||
'Anthony Weems',
|
'Anthony Weems',
|
||||||
|
'David Wagner',
|
||||||
|
'Juan C. Olivares',
|
||||||
|
'Mattias Harrysson',
|
||||||
)
|
)
|
||||||
|
|
||||||
__license__ = 'Public Domain'
|
__license__ = 'Public Domain'
|
||||||
|
|
||||||
import codecs
|
import codecs
|
||||||
import getpass
|
|
||||||
import io
|
import io
|
||||||
import locale
|
import locale
|
||||||
import optparse
|
import optparse
|
||||||
@ -65,6 +67,7 @@ import sys
|
|||||||
|
|
||||||
|
|
||||||
from .utils import (
|
from .utils import (
|
||||||
|
compat_getpass,
|
||||||
compat_print,
|
compat_print,
|
||||||
DateRange,
|
DateRange,
|
||||||
decodeOption,
|
decodeOption,
|
||||||
@ -234,6 +237,9 @@ def parseOpts(overrideArguments=None):
|
|||||||
'--proxy', dest='proxy', default=None, metavar='URL',
|
'--proxy', dest='proxy', default=None, metavar='URL',
|
||||||
help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
|
help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
|
||||||
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
|
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
|
||||||
|
general.add_option(
|
||||||
|
'--prefer-insecure', action='store_true', dest='prefer_insecure',
|
||||||
|
help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
|
||||||
general.add_option(
|
general.add_option(
|
||||||
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
|
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
|
||||||
help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
|
help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
|
||||||
@ -254,7 +260,6 @@ def parseOpts(overrideArguments=None):
|
|||||||
action='store_true',
|
action='store_true',
|
||||||
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
|
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
|
||||||
|
|
||||||
|
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--playlist-start',
|
'--playlist-start',
|
||||||
dest='playliststart', metavar='NUMBER', default=1, type=int,
|
dest='playliststart', metavar='NUMBER', default=1, type=int,
|
||||||
@ -313,7 +318,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
|
|
||||||
video_format.add_option('-f', '--format',
|
video_format.add_option('-f', '--format',
|
||||||
action='store', dest='format', metavar='FORMAT', default=None,
|
action='store', dest='format', metavar='FORMAT', default=None,
|
||||||
help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestaudio", "worst", and "worstaudio". By default, youtube-dl will pick the best quality.')
|
help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality.')
|
||||||
video_format.add_option('--all-formats',
|
video_format.add_option('--all-formats',
|
||||||
action='store_const', dest='format', help='download all available video formats', const='all')
|
action='store_const', dest='format', help='download all available video formats', const='all')
|
||||||
video_format.add_option('--prefer-free-formats',
|
video_format.add_option('--prefer-free-formats',
|
||||||
@ -428,6 +433,8 @@ def parseOpts(overrideArguments=None):
|
|||||||
'%(extractor)s for the provider (youtube, metacafe, etc), '
|
'%(extractor)s for the provider (youtube, metacafe, etc), '
|
||||||
'%(id)s for the video id, %(playlist)s for the playlist the video is in, '
|
'%(id)s for the video id, %(playlist)s for the playlist the video is in, '
|
||||||
'%(playlist_index)s for the position in the playlist and %% for a literal percent. '
|
'%(playlist_index)s for the position in the playlist and %% for a literal percent. '
|
||||||
|
'%(height)s and %(width)s for the width and height of the video format. '
|
||||||
|
'%(resolution)s for a textual description of the resolution of the video format. '
|
||||||
'Use - to output to stdout. Can also be used to download to a different directory, '
|
'Use - to output to stdout. Can also be used to download to a different directory, '
|
||||||
'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
|
'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
|
||||||
filesystem.add_option('--autonumber-size',
|
filesystem.add_option('--autonumber-size',
|
||||||
@ -606,7 +613,7 @@ def _real_main(argv=None):
|
|||||||
if opts.usetitle and opts.useid:
|
if opts.usetitle and opts.useid:
|
||||||
parser.error(u'using title conflicts with using video ID')
|
parser.error(u'using title conflicts with using video ID')
|
||||||
if opts.username is not None and opts.password is None:
|
if opts.username is not None and opts.password is None:
|
||||||
opts.password = getpass.getpass(u'Type account password and press return:')
|
opts.password = compat_getpass(u'Type account password and press [Return]: ')
|
||||||
if opts.ratelimit is not None:
|
if opts.ratelimit is not None:
|
||||||
numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
|
numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
|
||||||
if numeric_limit is None:
|
if numeric_limit is None:
|
||||||
@ -751,6 +758,7 @@ def _real_main(argv=None):
|
|||||||
'download_archive': download_archive_fn,
|
'download_archive': download_archive_fn,
|
||||||
'cookiefile': opts.cookiefile,
|
'cookiefile': opts.cookiefile,
|
||||||
'nocheckcertificate': opts.no_check_certificate,
|
'nocheckcertificate': opts.no_check_certificate,
|
||||||
|
'prefer_insecure': opts.prefer_insecure,
|
||||||
'proxy': opts.proxy,
|
'proxy': opts.proxy,
|
||||||
'socket_timeout': opts.socket_timeout,
|
'socket_timeout': opts.socket_timeout,
|
||||||
'bidi_workaround': opts.bidi_workaround,
|
'bidi_workaround': opts.bidi_workaround,
|
||||||
|
@ -49,7 +49,7 @@ class HttpFD(FileDownloader):
|
|||||||
while count <= retries:
|
while count <= retries:
|
||||||
# Establish connection
|
# Establish connection
|
||||||
try:
|
try:
|
||||||
data = compat_urllib_request.urlopen(request)
|
data = self.ydl.urlopen(request)
|
||||||
break
|
break
|
||||||
except (compat_urllib_error.HTTPError, ) as err:
|
except (compat_urllib_error.HTTPError, ) as err:
|
||||||
if (err.code < 500 or err.code >= 600) and err.code != 416:
|
if (err.code < 500 or err.code >= 600) and err.code != 416:
|
||||||
@ -59,7 +59,7 @@ class HttpFD(FileDownloader):
|
|||||||
# Unable to resume (requested range not satisfiable)
|
# Unable to resume (requested range not satisfiable)
|
||||||
try:
|
try:
|
||||||
# Open the connection again without the range header
|
# Open the connection again without the range header
|
||||||
data = compat_urllib_request.urlopen(basic_request)
|
data = self.ydl.urlopen(basic_request)
|
||||||
content_length = data.info()['Content-Length']
|
content_length = data.info()['Content-Length']
|
||||||
except (compat_urllib_error.HTTPError, ) as err:
|
except (compat_urllib_error.HTTPError, ) as err:
|
||||||
if err.code < 500 or err.code >= 600:
|
if err.code < 500 or err.code >= 600:
|
||||||
@ -85,6 +85,7 @@ class HttpFD(FileDownloader):
|
|||||||
else:
|
else:
|
||||||
# The length does not match, we start the download over
|
# The length does not match, we start the download over
|
||||||
self.report_unable_to_resume()
|
self.report_unable_to_resume()
|
||||||
|
resume_len = 0
|
||||||
open_mode = 'wb'
|
open_mode = 'wb'
|
||||||
break
|
break
|
||||||
# Retry
|
# Retry
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
@ -22,7 +24,7 @@ class RtmpFD(FileDownloader):
|
|||||||
proc_stderr_closed = False
|
proc_stderr_closed = False
|
||||||
while not proc_stderr_closed:
|
while not proc_stderr_closed:
|
||||||
# read line from stderr
|
# read line from stderr
|
||||||
line = u''
|
line = ''
|
||||||
while True:
|
while True:
|
||||||
char = proc.stderr.read(1)
|
char = proc.stderr.read(1)
|
||||||
if not char:
|
if not char:
|
||||||
@ -46,7 +48,7 @@ class RtmpFD(FileDownloader):
|
|||||||
data_len = None
|
data_len = None
|
||||||
if percent > 0:
|
if percent > 0:
|
||||||
data_len = int(downloaded_data_len * 100 / percent)
|
data_len = int(downloaded_data_len * 100 / percent)
|
||||||
data_len_str = u'~' + format_bytes(data_len)
|
data_len_str = '~' + format_bytes(data_len)
|
||||||
self.report_progress(percent, data_len_str, speed, eta)
|
self.report_progress(percent, data_len_str, speed, eta)
|
||||||
cursor_in_new_line = False
|
cursor_in_new_line = False
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
@ -76,12 +78,12 @@ class RtmpFD(FileDownloader):
|
|||||||
})
|
})
|
||||||
elif self.params.get('verbose', False):
|
elif self.params.get('verbose', False):
|
||||||
if not cursor_in_new_line:
|
if not cursor_in_new_line:
|
||||||
self.to_screen(u'')
|
self.to_screen('')
|
||||||
cursor_in_new_line = True
|
cursor_in_new_line = True
|
||||||
self.to_screen(u'[rtmpdump] '+line)
|
self.to_screen('[rtmpdump] '+line)
|
||||||
proc.wait()
|
proc.wait()
|
||||||
if not cursor_in_new_line:
|
if not cursor_in_new_line:
|
||||||
self.to_screen(u'')
|
self.to_screen('')
|
||||||
return proc.returncode
|
return proc.returncode
|
||||||
|
|
||||||
url = info_dict['url']
|
url = info_dict['url']
|
||||||
@ -102,7 +104,7 @@ class RtmpFD(FileDownloader):
|
|||||||
try:
|
try:
|
||||||
subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
|
self.report_error('RTMP download detected but "rtmpdump" could not be run')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||||
@ -127,7 +129,7 @@ class RtmpFD(FileDownloader):
|
|||||||
basic_args += ['--live']
|
basic_args += ['--live']
|
||||||
if conn:
|
if conn:
|
||||||
basic_args += ['--conn', conn]
|
basic_args += ['--conn', conn]
|
||||||
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
|
args = basic_args + [[], ['--resume', '--skip', '1']][not live and self.params.get('continuedl', False)]
|
||||||
|
|
||||||
if sys.platform == 'win32' and sys.version_info < (3, 0):
|
if sys.platform == 'win32' and sys.version_info < (3, 0):
|
||||||
# Windows subprocess module does not actually support Unicode
|
# Windows subprocess module does not actually support Unicode
|
||||||
@ -150,26 +152,35 @@ class RtmpFD(FileDownloader):
|
|||||||
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
|
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
|
||||||
except ImportError:
|
except ImportError:
|
||||||
shell_quote = repr
|
shell_quote = repr
|
||||||
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
|
self.to_screen('[debug] rtmpdump command line: ' + shell_quote(str_args))
|
||||||
|
|
||||||
|
RD_SUCCESS = 0
|
||||||
|
RD_FAILED = 1
|
||||||
|
RD_INCOMPLETE = 2
|
||||||
|
RD_NO_CONNECT = 3
|
||||||
|
|
||||||
retval = run_rtmpdump(args)
|
retval = run_rtmpdump(args)
|
||||||
|
|
||||||
while (retval == 2 or retval == 1) and not test:
|
if retval == RD_NO_CONNECT:
|
||||||
|
self.report_error('[rtmpdump] Could not connect to RTMP server.')
|
||||||
|
return False
|
||||||
|
|
||||||
|
while (retval == RD_INCOMPLETE or retval == RD_FAILED) and not test and not live:
|
||||||
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
|
self.to_screen('[rtmpdump] %s bytes' % prevsize)
|
||||||
time.sleep(5.0) # This seems to be needed
|
time.sleep(5.0) # This seems to be needed
|
||||||
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
|
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == RD_FAILED])
|
||||||
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
if prevsize == cursize and retval == 1:
|
if prevsize == cursize and retval == RD_FAILED:
|
||||||
break
|
break
|
||||||
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
||||||
if prevsize == cursize and retval == 2 and cursize > 1024:
|
if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024:
|
||||||
self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
||||||
retval = 0
|
retval = RD_SUCCESS
|
||||||
break
|
break
|
||||||
if retval == 0 or (test and retval == 2):
|
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
|
||||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
self.to_screen(u'[rtmpdump] %s bytes' % fsize)
|
self.to_screen('[rtmpdump] %s bytes' % fsize)
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(tmpfilename, filename)
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'downloaded_bytes': fsize,
|
'downloaded_bytes': fsize,
|
||||||
@ -179,6 +190,6 @@ class RtmpFD(FileDownloader):
|
|||||||
})
|
})
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
self.to_stderr(u"\n")
|
self.to_stderr('\n')
|
||||||
self.report_error(u'rtmpdump exited with code %d' % retval)
|
self.report_error('rtmpdump exited with code %d' % retval)
|
||||||
return False
|
return False
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
from .academicearth import AcademicEarthCourseIE
|
from .academicearth import AcademicEarthCourseIE
|
||||||
from .addanime import AddAnimeIE
|
from .addanime import AddAnimeIE
|
||||||
|
from .aftonbladet import AftonbladetIE
|
||||||
from .anitube import AnitubeIE
|
from .anitube import AnitubeIE
|
||||||
|
from .aol import AolIE
|
||||||
from .aparat import AparatIE
|
from .aparat import AparatIE
|
||||||
from .appletrailers import AppleTrailersIE
|
from .appletrailers import AppleTrailersIE
|
||||||
from .archiveorg import ArchiveOrgIE
|
from .archiveorg import ArchiveOrgIE
|
||||||
@ -9,6 +11,7 @@ from .arte import (
|
|||||||
ArteTvIE,
|
ArteTvIE,
|
||||||
ArteTVPlus7IE,
|
ArteTVPlus7IE,
|
||||||
ArteTVCreativeIE,
|
ArteTVCreativeIE,
|
||||||
|
ArteTVConcertIE,
|
||||||
ArteTVFutureIE,
|
ArteTVFutureIE,
|
||||||
ArteTVDDCIE,
|
ArteTVDDCIE,
|
||||||
)
|
)
|
||||||
@ -23,9 +26,11 @@ from .br import BRIE
|
|||||||
from .breakcom import BreakIE
|
from .breakcom import BreakIE
|
||||||
from .brightcove import BrightcoveIE
|
from .brightcove import BrightcoveIE
|
||||||
from .c56 import C56IE
|
from .c56 import C56IE
|
||||||
|
from .canal13cl import Canal13clIE
|
||||||
from .canalplus import CanalplusIE
|
from .canalplus import CanalplusIE
|
||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
from .cbs import CBSIE
|
from .cbs import CBSIE
|
||||||
|
from .ceskatelevize import CeskaTelevizeIE
|
||||||
from .channel9 import Channel9IE
|
from .channel9 import Channel9IE
|
||||||
from .chilloutzone import ChilloutzoneIE
|
from .chilloutzone import ChilloutzoneIE
|
||||||
from .cinemassacre import CinemassacreIE
|
from .cinemassacre import CinemassacreIE
|
||||||
@ -50,7 +55,6 @@ from .dailymotion import (
|
|||||||
DailymotionUserIE,
|
DailymotionUserIE,
|
||||||
)
|
)
|
||||||
from .daum import DaumIE
|
from .daum import DaumIE
|
||||||
from .depositfiles import DepositFilesIE
|
|
||||||
from .dotsub import DotsubIE
|
from .dotsub import DotsubIE
|
||||||
from .dreisat import DreiSatIE
|
from .dreisat import DreiSatIE
|
||||||
from .defense import DefenseGouvFrIE
|
from .defense import DefenseGouvFrIE
|
||||||
@ -61,6 +65,7 @@ from .ehow import EHowIE
|
|||||||
from .eighttracks import EightTracksIE
|
from .eighttracks import EightTracksIE
|
||||||
from .eitb import EitbIE
|
from .eitb import EitbIE
|
||||||
from .elpais import ElPaisIE
|
from .elpais import ElPaisIE
|
||||||
|
from .engadget import EngadgetIE
|
||||||
from .escapist import EscapistIE
|
from .escapist import EscapistIE
|
||||||
from .everyonesmixtape import EveryonesMixtapeIE
|
from .everyonesmixtape import EveryonesMixtapeIE
|
||||||
from .exfm import ExfmIE
|
from .exfm import ExfmIE
|
||||||
@ -69,6 +74,7 @@ from .facebook import FacebookIE
|
|||||||
from .faz import FazIE
|
from .faz import FazIE
|
||||||
from .firstpost import FirstpostIE
|
from .firstpost import FirstpostIE
|
||||||
from .firsttv import FirstTVIE
|
from .firsttv import FirstTVIE
|
||||||
|
from .fivemin import FiveMinIE
|
||||||
from .fktv import (
|
from .fktv import (
|
||||||
FKTVIE,
|
FKTVIE,
|
||||||
FKTVPosteckeIE,
|
FKTVPosteckeIE,
|
||||||
@ -89,6 +95,7 @@ from .funnyordie import FunnyOrDieIE
|
|||||||
from .gamekings import GamekingsIE
|
from .gamekings import GamekingsIE
|
||||||
from .gamespot import GameSpotIE
|
from .gamespot import GameSpotIE
|
||||||
from .gametrailers import GametrailersIE
|
from .gametrailers import GametrailersIE
|
||||||
|
from .gdcvault import GDCVaultIE
|
||||||
from .generic import GenericIE
|
from .generic import GenericIE
|
||||||
from .googleplus import GooglePlusIE
|
from .googleplus import GooglePlusIE
|
||||||
from .googlesearch import GoogleSearchIE
|
from .googlesearch import GoogleSearchIE
|
||||||
@ -105,7 +112,7 @@ from .imdb import (
|
|||||||
)
|
)
|
||||||
from .ina import InaIE
|
from .ina import InaIE
|
||||||
from .infoq import InfoQIE
|
from .infoq import InfoQIE
|
||||||
from .instagram import InstagramIE
|
from .instagram import InstagramIE, InstagramUserIE
|
||||||
from .internetvideoarchive import InternetVideoArchiveIE
|
from .internetvideoarchive import InternetVideoArchiveIE
|
||||||
from .iprima import IPrimaIE
|
from .iprima import IPrimaIE
|
||||||
from .ivi import (
|
from .ivi import (
|
||||||
@ -133,6 +140,7 @@ from .lynda import (
|
|||||||
)
|
)
|
||||||
from .m6 import M6IE
|
from .m6 import M6IE
|
||||||
from .macgamestore import MacGameStoreIE
|
from .macgamestore import MacGameStoreIE
|
||||||
|
from .mailru import MailRuIE
|
||||||
from .malemotion import MalemotionIE
|
from .malemotion import MalemotionIE
|
||||||
from .mdr import MDRIE
|
from .mdr import MDRIE
|
||||||
from .metacafe import MetacafeIE
|
from .metacafe import MetacafeIE
|
||||||
@ -169,8 +177,10 @@ from .nowness import NownessIE
|
|||||||
from .nowvideo import NowVideoIE
|
from .nowvideo import NowVideoIE
|
||||||
from .ooyala import OoyalaIE
|
from .ooyala import OoyalaIE
|
||||||
from .orf import ORFIE
|
from .orf import ORFIE
|
||||||
|
from .parliamentliveuk import ParliamentLiveUKIE
|
||||||
from .pbs import PBSIE
|
from .pbs import PBSIE
|
||||||
from .photobucket import PhotobucketIE
|
from .photobucket import PhotobucketIE
|
||||||
|
from .playvid import PlayvidIE
|
||||||
from .podomatic import PodomaticIE
|
from .podomatic import PodomaticIE
|
||||||
from .pornhd import PornHdIE
|
from .pornhd import PornHdIE
|
||||||
from .pornhub import PornHubIE
|
from .pornhub import PornHubIE
|
||||||
@ -185,12 +195,14 @@ from .ro220 import Ro220IE
|
|||||||
from .rottentomatoes import RottenTomatoesIE
|
from .rottentomatoes import RottenTomatoesIE
|
||||||
from .roxwel import RoxwelIE
|
from .roxwel import RoxwelIE
|
||||||
from .rtlnow import RTLnowIE
|
from .rtlnow import RTLnowIE
|
||||||
|
from .rts import RTSIE
|
||||||
from .rutube import (
|
from .rutube import (
|
||||||
RutubeIE,
|
RutubeIE,
|
||||||
RutubeChannelIE,
|
RutubeChannelIE,
|
||||||
RutubeMovieIE,
|
RutubeMovieIE,
|
||||||
RutubePersonIE,
|
RutubePersonIE,
|
||||||
)
|
)
|
||||||
|
from .rutv import RUTVIE
|
||||||
from .savefrom import SaveFromIE
|
from .savefrom import SaveFromIE
|
||||||
from .servingsys import ServingSysIE
|
from .servingsys import ServingSysIE
|
||||||
from .sina import SinaIE
|
from .sina import SinaIE
|
||||||
@ -228,6 +240,7 @@ from .theplatform import ThePlatformIE
|
|||||||
from .thisav import ThisAVIE
|
from .thisav import ThisAVIE
|
||||||
from .tinypic import TinyPicIE
|
from .tinypic import TinyPicIE
|
||||||
from .toutv import TouTvIE
|
from .toutv import TouTvIE
|
||||||
|
from .toypics import ToypicsUserIE, ToypicsIE
|
||||||
from .traileraddict import TrailerAddictIE
|
from .traileraddict import TrailerAddictIE
|
||||||
from .trilulilu import TriluliluIE
|
from .trilulilu import TriluliluIE
|
||||||
from .trutube import TruTubeIE
|
from .trutube import TruTubeIE
|
||||||
@ -235,7 +248,12 @@ from .tube8 import Tube8IE
|
|||||||
from .tudou import TudouIE
|
from .tudou import TudouIE
|
||||||
from .tumblr import TumblrIE
|
from .tumblr import TumblrIE
|
||||||
from .tutv import TutvIE
|
from .tutv import TutvIE
|
||||||
|
from .tvigle import TvigleIE
|
||||||
from .tvp import TvpIE
|
from .tvp import TvpIE
|
||||||
|
from .udemy import (
|
||||||
|
UdemyIE,
|
||||||
|
UdemyCourseIE
|
||||||
|
)
|
||||||
from .unistra import UnistraIE
|
from .unistra import UnistraIE
|
||||||
from .ustream import UstreamIE, UstreamChannelIE
|
from .ustream import UstreamIE, UstreamChannelIE
|
||||||
from .vbox7 import Vbox7IE
|
from .vbox7 import Vbox7IE
|
||||||
@ -247,6 +265,7 @@ from .vice import ViceIE
|
|||||||
from .viddler import ViddlerIE
|
from .viddler import ViddlerIE
|
||||||
from .videobam import VideoBamIE
|
from .videobam import VideoBamIE
|
||||||
from .videodetective import VideoDetectiveIE
|
from .videodetective import VideoDetectiveIE
|
||||||
|
from .videolecturesnet import VideoLecturesNetIE
|
||||||
from .videofyme import VideofyMeIE
|
from .videofyme import VideofyMeIE
|
||||||
from .videopremium import VideoPremiumIE
|
from .videopremium import VideoPremiumIE
|
||||||
from .vimeo import (
|
from .vimeo import (
|
||||||
@ -262,14 +281,16 @@ from .viki import VikiIE
|
|||||||
from .vk import VKIE
|
from .vk import VKIE
|
||||||
from .vube import VubeIE
|
from .vube import VubeIE
|
||||||
from .wat import WatIE
|
from .wat import WatIE
|
||||||
|
from .wdr import WDRIE
|
||||||
from .weibo import WeiboIE
|
from .weibo import WeiboIE
|
||||||
from .wimp import WimpIE
|
from .wimp import WimpIE
|
||||||
from .wistia import WistiaIE
|
from .wistia import WistiaIE
|
||||||
from .worldstarhiphop import WorldStarHipHopIE
|
from .worldstarhiphop import WorldStarHipHopIE
|
||||||
|
from .xbef import XBefIE
|
||||||
from .xhamster import XHamsterIE
|
from .xhamster import XHamsterIE
|
||||||
from .xnxx import XNXXIE
|
from .xnxx import XNXXIE
|
||||||
from .xvideos import XVideosIE
|
from .xvideos import XVideosIE
|
||||||
from .xtube import XTubeIE
|
from .xtube import XTubeUserIE, XTubeIE
|
||||||
from .yahoo import (
|
from .yahoo import (
|
||||||
YahooIE,
|
YahooIE,
|
||||||
YahooNewsIE,
|
YahooNewsIE,
|
||||||
@ -280,19 +301,20 @@ from .youku import YoukuIE
|
|||||||
from .youporn import YouPornIE
|
from .youporn import YouPornIE
|
||||||
from .youtube import (
|
from .youtube import (
|
||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
YoutubePlaylistIE,
|
|
||||||
YoutubeSearchIE,
|
|
||||||
YoutubeSearchDateIE,
|
|
||||||
YoutubeUserIE,
|
|
||||||
YoutubeChannelIE,
|
YoutubeChannelIE,
|
||||||
YoutubeShowIE,
|
|
||||||
YoutubeSubscriptionsIE,
|
|
||||||
YoutubeRecommendedIE,
|
|
||||||
YoutubeTruncatedURLIE,
|
|
||||||
YoutubeWatchLaterIE,
|
|
||||||
YoutubeFavouritesIE,
|
YoutubeFavouritesIE,
|
||||||
YoutubeHistoryIE,
|
YoutubeHistoryIE,
|
||||||
|
YoutubePlaylistIE,
|
||||||
|
YoutubeRecommendedIE,
|
||||||
|
YoutubeSearchDateIE,
|
||||||
|
YoutubeSearchIE,
|
||||||
|
YoutubeSearchURLIE,
|
||||||
|
YoutubeShowIE,
|
||||||
|
YoutubeSubscriptionsIE,
|
||||||
YoutubeTopListIE,
|
YoutubeTopListIE,
|
||||||
|
YoutubeTruncatedURLIE,
|
||||||
|
YoutubeUserIE,
|
||||||
|
YoutubeWatchLaterIE,
|
||||||
)
|
)
|
||||||
from .zdf import ZDFIE
|
from .zdf import ZDFIE
|
||||||
|
|
||||||
|
69
youtube_dl/extractor/aftonbladet.py
Normal file
69
youtube_dl/extractor/aftonbladet.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class AftonbladetIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+?(?P<video_id>article[0-9]+)\.ab(?:$|[?#])'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'article36015',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
|
||||||
|
'description': 'Jupiters måne mest aktiv av alla himlakroppar',
|
||||||
|
'upload_date': '20140306',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.search(self._VALID_URL, url)
|
||||||
|
|
||||||
|
video_id = mobj.group('video_id')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
# find internal video meta data
|
||||||
|
META_URL = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
|
||||||
|
internal_meta_id = self._html_search_regex(
|
||||||
|
r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id')
|
||||||
|
internal_meta_url = META_URL % internal_meta_id
|
||||||
|
internal_meta_json = self._download_json(
|
||||||
|
internal_meta_url, video_id, 'Downloading video meta data')
|
||||||
|
|
||||||
|
# find internal video formats
|
||||||
|
FORMATS_URL = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
|
||||||
|
internal_video_id = internal_meta_json['videoId']
|
||||||
|
internal_formats_url = FORMATS_URL % internal_video_id
|
||||||
|
internal_formats_json = self._download_json(
|
||||||
|
internal_formats_url, video_id, 'Downloading video formats')
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for fmt in internal_formats_json['formats']['http']['pseudostreaming']['mp4']:
|
||||||
|
p = fmt['paths'][0]
|
||||||
|
formats.append({
|
||||||
|
'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
|
||||||
|
'ext': 'mp4',
|
||||||
|
'width': fmt['width'],
|
||||||
|
'height': fmt['height'],
|
||||||
|
'tbr': fmt['bitrate'],
|
||||||
|
'protocol': 'http',
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
timestamp = datetime.datetime.fromtimestamp(internal_meta_json['timePublished'])
|
||||||
|
upload_date = timestamp.strftime('%Y%m%d')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': internal_meta_json['title'],
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': internal_meta_json['imageUrl'],
|
||||||
|
'description': internal_meta_json['shortPreamble'],
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'duration': internal_meta_json['duration'],
|
||||||
|
'view_count': internal_meta_json['views'],
|
||||||
|
}
|
28
youtube_dl/extractor/aol.py
Normal file
28
youtube_dl/extractor/aol.py
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .fivemin import FiveMinIE
|
||||||
|
|
||||||
|
|
||||||
|
class AolIE(InfoExtractor):
|
||||||
|
IE_NAME = 'on.aol.com'
|
||||||
|
_VALID_URL = r'http://on\.aol\.com/video/.*-(?P<id>\d+)($|\?)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
|
||||||
|
'md5': '18ef68f48740e86ae94b98da815eec42',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '518167793',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
|
||||||
|
},
|
||||||
|
'add_ie': ['FiveMin'],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
self.to_screen('Downloading 5min.com video %s' % video_id)
|
||||||
|
return FiveMinIE._build_result(video_id)
|
@ -72,18 +72,22 @@ class ArteTvIE(InfoExtractor):
|
|||||||
return self._extract_liveweb(url, name, lang)
|
return self._extract_liveweb(url, name, lang)
|
||||||
|
|
||||||
if re.search(self._LIVE_URL, url) is not None:
|
if re.search(self._LIVE_URL, url) is not None:
|
||||||
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
|
raise ExtractorError('Arte live streams are not yet supported, sorry')
|
||||||
# self.extractLiveStream(url)
|
# self.extractLiveStream(url)
|
||||||
# return
|
# return
|
||||||
|
|
||||||
|
raise ExtractorError('No video found')
|
||||||
|
|
||||||
def _extract_video(self, url, video_id, lang):
|
def _extract_video(self, url, video_id, lang):
|
||||||
"""Extract from videos.arte.tv"""
|
"""Extract from videos.arte.tv"""
|
||||||
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
||||||
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
||||||
ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata')
|
ref_xml_doc = self._download_xml(
|
||||||
|
ref_xml_url, video_id, note='Downloading metadata')
|
||||||
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
|
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
|
||||||
config_xml_url = config_node.attrib['ref']
|
config_xml_url = config_node.attrib['ref']
|
||||||
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
|
config_xml = self._download_webpage(
|
||||||
|
config_xml_url, video_id, note='Downloading configuration')
|
||||||
|
|
||||||
video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml))
|
video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml))
|
||||||
def _key(m):
|
def _key(m):
|
||||||
@ -127,7 +131,7 @@ class ArteTvIE(InfoExtractor):
|
|||||||
|
|
||||||
class ArteTVPlus7IE(InfoExtractor):
|
class ArteTVPlus7IE(InfoExtractor):
|
||||||
IE_NAME = 'arte.tv:+7'
|
IE_NAME = 'arte.tv:+7'
|
||||||
_VALID_URL = r'https?://www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
|
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _extract_url_info(cls, url):
|
def _extract_url_info(cls, url):
|
||||||
@ -198,6 +202,8 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None,
|
re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None,
|
||||||
# The version with sourds/mal subtitles has also lower relevance
|
# The version with sourds/mal subtitles has also lower relevance
|
||||||
re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None,
|
re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None,
|
||||||
|
# Prefer http downloads over m3u8
|
||||||
|
0 if f['url'].endswith('m3u8') else 1,
|
||||||
)
|
)
|
||||||
formats = sorted(formats, key=sort_key)
|
formats = sorted(formats, key=sort_key)
|
||||||
def _format(format_info):
|
def _format(format_info):
|
||||||
@ -238,8 +244,9 @@ class ArteTVCreativeIE(ArteTVPlus7IE):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
|
'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
|
||||||
'file': '050489-002.mp4',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '050489-002',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design',
|
'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@ -251,8 +258,9 @@ class ArteTVFutureIE(ArteTVPlus7IE):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
|
'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
|
||||||
'file': '050940-003.mp4',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '050940-003',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Les champignons au secours de la planète',
|
'title': 'Les champignons au secours de la planète',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@ -266,7 +274,7 @@ class ArteTVFutureIE(ArteTVPlus7IE):
|
|||||||
|
|
||||||
class ArteTVDDCIE(ArteTVPlus7IE):
|
class ArteTVDDCIE(ArteTVPlus7IE):
|
||||||
IE_NAME = 'arte.tv:ddc'
|
IE_NAME = 'arte.tv:ddc'
|
||||||
_VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
|
_VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id, lang = self._extract_url_info(url)
|
video_id, lang = self._extract_url_info(url)
|
||||||
@ -280,3 +288,20 @@ class ArteTVDDCIE(ArteTVPlus7IE):
|
|||||||
javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator')
|
javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator')
|
||||||
json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url')
|
json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url')
|
||||||
return self._extract_from_json_url(json_url, video_id, lang)
|
return self._extract_from_json_url(json_url, video_id, lang)
|
||||||
|
|
||||||
|
|
||||||
|
class ArteTVConcertIE(ArteTVPlus7IE):
|
||||||
|
IE_NAME = 'arte.tv:concert'
|
||||||
|
_VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>de|fr)/(?P<id>.+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde',
|
||||||
|
'md5': '9ea035b7bd69696b67aa2ccaaa218161',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '186',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The Notwist im Pariser Konzertclub "Divan du Monde"',
|
||||||
|
'upload_date': '20140128',
|
||||||
|
'description': 'md5:486eb08f991552ade77439fe6d82c305',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
@ -9,21 +9,35 @@ from ..utils import ExtractorError
|
|||||||
|
|
||||||
class BRIE(InfoExtractor):
|
class BRIE(InfoExtractor):
|
||||||
IE_DESC = "Bayerischer Rundfunk Mediathek"
|
IE_DESC = "Bayerischer Rundfunk Mediathek"
|
||||||
_VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?P<id>[a-z0-9\-]+)\.html$"
|
_VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?:[a-z0-9\-/]+/)?(?P<id>[a-z0-9\-]+)\.html$"
|
||||||
_BASE_URL = "http://www.br.de"
|
_BASE_URL = "http://www.br.de"
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
"url": "http://www.br.de/mediathek/video/anselm-gruen-114.html",
|
{
|
||||||
"md5": "c4f83cf0f023ba5875aba0bf46860df2",
|
"url": "http://www.br.de/mediathek/video/anselm-gruen-114.html",
|
||||||
"info_dict": {
|
"md5": "c4f83cf0f023ba5875aba0bf46860df2",
|
||||||
"id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532",
|
"info_dict": {
|
||||||
"ext": "mp4",
|
"id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532",
|
||||||
"title": "Feiern und Verzichten",
|
"ext": "mp4",
|
||||||
"description": "Anselm Grün: Feiern und Verzichten",
|
"title": "Feiern und Verzichten",
|
||||||
"uploader": "BR/Birgit Baier",
|
"description": "Anselm Grün: Feiern und Verzichten",
|
||||||
"upload_date": "20140301"
|
"uploader": "BR/Birgit Baier",
|
||||||
|
"upload_date": "20140301"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html",
|
||||||
|
"md5": "ab451b09d861dbed7d7cc9ab0be19ebe",
|
||||||
|
"info_dict": {
|
||||||
|
"id": "2c060e69-3a27-4e13-b0f0-668fac17d812",
|
||||||
|
"ext": "mp4",
|
||||||
|
"title": "Über den Pass",
|
||||||
|
"description": "Die Eroberung der Alpen: Über den Pass",
|
||||||
|
"uploader": None,
|
||||||
|
"upload_date": None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@ -33,16 +47,21 @@ class BRIE(InfoExtractor):
|
|||||||
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL")
|
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL")
|
||||||
xml = self._download_xml(self._BASE_URL + xml_url, None)
|
xml = self._download_xml(self._BASE_URL + xml_url, None)
|
||||||
|
|
||||||
videos = [{
|
videos = []
|
||||||
"id": xml_video.get("externalId"),
|
for xml_video in xml.findall("video"):
|
||||||
"title": xml_video.find("title").text,
|
video = {
|
||||||
"formats": self._extract_formats(xml_video.find("assets")),
|
"id": xml_video.get("externalId"),
|
||||||
"thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")),
|
"title": xml_video.find("title").text,
|
||||||
"description": " ".join(xml_video.find("shareTitle").text.splitlines()),
|
"formats": self._extract_formats(xml_video.find("assets")),
|
||||||
"uploader": xml_video.find("author").text,
|
"thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")),
|
||||||
"upload_date": "".join(reversed(xml_video.find("broadcastDate").text.split("."))),
|
"description": " ".join(xml_video.find("shareTitle").text.splitlines()),
|
||||||
"webpage_url": xml_video.find("permalink").text,
|
"webpage_url": xml_video.find("permalink").text
|
||||||
} for xml_video in xml.findall("video")]
|
}
|
||||||
|
if xml_video.find("author").text:
|
||||||
|
video["uploader"] = xml_video.find("author").text
|
||||||
|
if xml_video.find("broadcastDate").text:
|
||||||
|
video["upload_date"] = "".join(reversed(xml_video.find("broadcastDate").text.split(".")))
|
||||||
|
videos.append(video)
|
||||||
|
|
||||||
if len(videos) > 1:
|
if len(videos) > 1:
|
||||||
self._downloader.report_warning(
|
self._downloader.report_warning(
|
||||||
|
48
youtube_dl/extractor/canal13cl.py
Normal file
48
youtube_dl/extractor/canal13cl.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class Canal13clIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'^http://(?:www\.)?13\.cl/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.13.cl/t13/nacional/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
|
||||||
|
'md5': '4cb1fa38adcad8fea88487a078831755',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1403022125',
|
||||||
|
'display_id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'El "círculo de hierro" de Michelle Bachelet en su regreso a La Moneda',
|
||||||
|
'description': '(Foto: Agencia Uno) En nueve días más, Michelle Bachelet va a asumir por segunda vez como presidenta de la República. Entre aquellos que la acompañarán hay caras que se repiten y otras que se consolidan en su entorno de colaboradores más cercanos.',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
display_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
title = self._html_search_meta(
|
||||||
|
'twitter:title', webpage, 'title', fatal=True)
|
||||||
|
description = self._html_search_meta(
|
||||||
|
'twitter:description', webpage, 'description')
|
||||||
|
url = self._html_search_regex(
|
||||||
|
r'articuloVideo = \"(.*?)\"', webpage, 'url')
|
||||||
|
real_id = self._search_regex(
|
||||||
|
r'[^0-9]([0-9]{7,})[^0-9]', url, 'id', default=display_id)
|
||||||
|
thumbnail = self._html_search_regex(
|
||||||
|
r'articuloImagen = \"(.*?)\"', webpage, 'thumbnail')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': real_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'url': url,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
126
youtube_dl/extractor/ceskatelevize.py
Normal file
126
youtube_dl/extractor/ceskatelevize.py
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_request,
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CeskaTelevizeIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/213512120230004-spanelska-chripka',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '213512120230004',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'První republika: Španělská chřipka',
|
||||||
|
'duration': 3107.4,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # requires rtmpdump
|
||||||
|
},
|
||||||
|
'skip': 'Works only from Czech Republic.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.ceskatelevize.cz/ivysilani/1030584952-tsatsiki-maminka-a-policajt',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '20138143440',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Tsatsiki, maminka a policajt',
|
||||||
|
'duration': 6754.1,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # requires rtmpdump
|
||||||
|
},
|
||||||
|
'skip': 'Works only from Czech Republic.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '14716',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'První republika: Zpěvačka z Dupárny Bobina',
|
||||||
|
'duration': 90,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # requires rtmpdump
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
url = url.replace('/porady/', '/ivysilani/').replace('/video/', '')
|
||||||
|
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
|
||||||
|
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
|
||||||
|
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||||
|
|
||||||
|
typ = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type')
|
||||||
|
episode_id = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id')
|
||||||
|
|
||||||
|
data = {
|
||||||
|
'playlist[0][type]': typ,
|
||||||
|
'playlist[0][id]': episode_id,
|
||||||
|
'requestUrl': compat_urllib_parse_urlparse(url).path,
|
||||||
|
'requestSource': 'iVysilani',
|
||||||
|
}
|
||||||
|
|
||||||
|
req = compat_urllib_request.Request('http://www.ceskatelevize.cz/ivysilani/ajax/get-playlist-url',
|
||||||
|
data=compat_urllib_parse.urlencode(data))
|
||||||
|
|
||||||
|
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||||
|
req.add_header('x-addr', '127.0.0.1')
|
||||||
|
req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||||
|
req.add_header('Referer', url)
|
||||||
|
|
||||||
|
playlistpage = self._download_json(req, video_id)
|
||||||
|
|
||||||
|
req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlistpage['url']))
|
||||||
|
req.add_header('Referer', url)
|
||||||
|
|
||||||
|
playlist = self._download_xml(req, video_id)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for i in playlist.find('smilRoot/body'):
|
||||||
|
if 'AD' not in i.attrib['id']:
|
||||||
|
base_url = i.attrib['base']
|
||||||
|
parsedurl = compat_urllib_parse_urlparse(base_url)
|
||||||
|
duration = i.attrib['duration']
|
||||||
|
|
||||||
|
for video in i.findall('video'):
|
||||||
|
if video.attrib['label'] != 'AD':
|
||||||
|
format_id = video.attrib['label']
|
||||||
|
play_path = video.attrib['src']
|
||||||
|
vbr = int(video.attrib['system-bitrate'])
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': base_url,
|
||||||
|
'vbr': vbr,
|
||||||
|
'play_path': play_path,
|
||||||
|
'app': parsedurl.path[1:] + '?' + parsedurl.query,
|
||||||
|
'rtmp_live': True,
|
||||||
|
'ext': 'flv',
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': episode_id,
|
||||||
|
'title': self._html_search_regex(r'<title>(.+?) — iVysílání — Česká televize</title>', webpage, 'title'),
|
||||||
|
'duration': float(duration),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -17,8 +17,9 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
'id': '6902724',
|
'id': '6902724',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Comic-Con Cosplay Catastrophe',
|
'title': 'Comic-Con Cosplay Catastrophe',
|
||||||
'description': 'Fans get creative this year',
|
'description': "Fans get creative this year at San Diego. Too creative. And yes, that's really Joss Whedon.",
|
||||||
'age_limit': 13,
|
'age_limit': 13,
|
||||||
|
'duration': 187,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -28,22 +29,22 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
'id': '3505939',
|
'id': '3505939',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Font Conference',
|
'title': 'Font Conference',
|
||||||
'description': 'This video wasn\'t long enough,',
|
'description': "This video wasn't long enough, so we made it double-spaced.",
|
||||||
'age_limit': 10,
|
'age_limit': 10,
|
||||||
'duration': 179,
|
'duration': 179,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# embedded youtube video
|
# embedded youtube video
|
||||||
{
|
{
|
||||||
'url': 'http://www.collegehumor.com/embed/6950457',
|
'url': 'http://www.collegehumor.com/embed/6950306',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'W5gMp3ZjYg4',
|
'id': 'Z-bao9fg6Yc',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
|
'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!',
|
||||||
'uploader': 'Funnyplox TV',
|
'uploader': 'Mark Dice',
|
||||||
'uploader_id': 'funnyploxtv',
|
'uploader_id': 'MarkDice',
|
||||||
'description': 'md5:7ded37421526d54afdf005e25bc2b7a3',
|
'description': 'md5:62c3dab9351fac7bb44b53b69511d87f',
|
||||||
'upload_date': '20140128',
|
'upload_date': '20140127',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -87,6 +88,7 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
duration = int_or_none(vdata.get('duration'), 1000)
|
duration = int_or_none(vdata.get('duration'), 1000)
|
||||||
|
like_count = int_or_none(vdata.get('likes'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -96,4 +98,5 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
|
'like_count': like_count,
|
||||||
}
|
}
|
||||||
|
@ -14,7 +14,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class ComedyCentralIE(MTVServicesInfoExtractor):
|
class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||||
_VALID_URL = r'''(?x)https?://(?:www\.)?comedycentral\.com/
|
_VALID_URL = r'''(?x)https?://(?:www\.)?(comedycentral|cc)\.com/
|
||||||
(video-clips|episodes|cc-studios|video-collections)
|
(video-clips|episodes|cc-studios|video-collections)
|
||||||
/(?P<title>.*)'''
|
/(?P<title>.*)'''
|
||||||
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
|
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
|
||||||
|
@ -74,7 +74,7 @@ class InfoExtractor(object):
|
|||||||
"http", "https", "rtsp", "rtmp", "m3u8" or so.
|
"http", "https", "rtsp", "rtmp", "m3u8" or so.
|
||||||
* preference Order number of this format. If this field is
|
* preference Order number of this format. If this field is
|
||||||
present and not None, the formats get sorted
|
present and not None, the formats get sorted
|
||||||
by this field.
|
by this field, regardless of all other values.
|
||||||
-1 for default (order by other properties),
|
-1 for default (order by other properties),
|
||||||
-2 or smaller for less than default.
|
-2 or smaller for less than default.
|
||||||
* quality Order number of the video quality of this
|
* quality Order number of the video quality of this
|
||||||
@ -88,12 +88,18 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
The following fields are optional:
|
The following fields are optional:
|
||||||
|
|
||||||
|
display_id An alternative identifier for the video, not necessarily
|
||||||
|
unique, but available before title. Typically, id is
|
||||||
|
something like "4234987", title "Dancing naked mole rats",
|
||||||
|
and display_id "dancing-naked-mole-rats"
|
||||||
thumbnails: A list of dictionaries (with the entries "resolution" and
|
thumbnails: A list of dictionaries (with the entries "resolution" and
|
||||||
"url") for the varying thumbnails
|
"url") for the varying thumbnails
|
||||||
thumbnail: Full URL to a video thumbnail image.
|
thumbnail: Full URL to a video thumbnail image.
|
||||||
description: One-line video description.
|
description: One-line video description.
|
||||||
uploader: Full name of the video uploader.
|
uploader: Full name of the video uploader.
|
||||||
|
timestamp: UNIX timestamp of the moment the video became available.
|
||||||
upload_date: Video upload date (YYYYMMDD).
|
upload_date: Video upload date (YYYYMMDD).
|
||||||
|
If not explicitly set, calculated from timestamp.
|
||||||
uploader_id: Nickname or id of the video uploader.
|
uploader_id: Nickname or id of the video uploader.
|
||||||
location: Physical location of the video.
|
location: Physical location of the video.
|
||||||
subtitles: The subtitle file contents as a dictionary in the format
|
subtitles: The subtitle file contents as a dictionary in the format
|
||||||
@ -114,9 +120,6 @@ class InfoExtractor(object):
|
|||||||
_real_extract() methods and define a _VALID_URL regexp.
|
_real_extract() methods and define a _VALID_URL regexp.
|
||||||
Probably, they should also be added to the list of extractors.
|
Probably, they should also be added to the list of extractors.
|
||||||
|
|
||||||
_real_extract() must return a *list* of information dictionaries as
|
|
||||||
described above.
|
|
||||||
|
|
||||||
Finally, the _WORKING attribute should be set to False for broken IEs
|
Finally, the _WORKING attribute should be set to False for broken IEs
|
||||||
in order to warn the users and skip the tests.
|
in order to warn the users and skip the tests.
|
||||||
"""
|
"""
|
||||||
@ -432,14 +435,14 @@ class InfoExtractor(object):
|
|||||||
if secure: regexes = self._og_regexes('video:secure_url') + regexes
|
if secure: regexes = self._og_regexes('video:secure_url') + regexes
|
||||||
return self._html_search_regex(regexes, html, name, **kargs)
|
return self._html_search_regex(regexes, html, name, **kargs)
|
||||||
|
|
||||||
def _html_search_meta(self, name, html, display_name=None):
|
def _html_search_meta(self, name, html, display_name=None, fatal=False):
|
||||||
if display_name is None:
|
if display_name is None:
|
||||||
display_name = name
|
display_name = name
|
||||||
return self._html_search_regex(
|
return self._html_search_regex(
|
||||||
r'''(?ix)<meta
|
r'''(?ix)<meta
|
||||||
(?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
|
(?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
|
||||||
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
|
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
|
||||||
html, display_name, fatal=False)
|
html, display_name, fatal=fatal)
|
||||||
|
|
||||||
def _dc_search_uploader(self, html):
|
def _dc_search_uploader(self, html):
|
||||||
return self._html_search_meta('dc.creator', html, 'uploader')
|
return self._html_search_meta('dc.creator', html, 'uploader')
|
||||||
|
@ -10,9 +10,9 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class CSpanIE(InfoExtractor):
|
class CSpanIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>\d+)'
|
_VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
|
||||||
IE_DESC = 'C-SPAN'
|
IE_DESC = 'C-SPAN'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
|
'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
|
||||||
'md5': '8e44ce11f0f725527daccc453f553eb0',
|
'md5': '8e44ce11f0f725527daccc453f553eb0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -22,13 +22,24 @@ class CSpanIE(InfoExtractor):
|
|||||||
'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
|
'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
|
||||||
},
|
},
|
||||||
'skip': 'Regularly fails on travis, for unknown reasons',
|
'skip': 'Regularly fails on travis, for unknown reasons',
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
|
||||||
|
# For whatever reason, the served video alternates between
|
||||||
|
# two different ones
|
||||||
|
#'md5': 'dbb0f047376d457f2ab8b3929cbb2d0c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '340723',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'International Health Care Models',
|
||||||
|
'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
page_id = mobj.group('id')
|
page_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, page_id)
|
webpage = self._download_webpage(url, page_id)
|
||||||
video_id = self._search_regex(r'data-progid=\'(\d+)\'>', webpage, 'video id')
|
video_id = self._search_regex(r'progid=\'?([0-9]+)\'?>', webpage, 'video id')
|
||||||
|
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
[
|
[
|
||||||
|
@ -12,6 +12,7 @@ from ..utils import (
|
|||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
int_or_none,
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
@ -124,7 +125,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
if video_url is not None:
|
if video_url is not None:
|
||||||
m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
|
m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
|
||||||
if m_size is not None:
|
if m_size is not None:
|
||||||
width, height = m_size.group(1), m_size.group(2)
|
width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
|
||||||
else:
|
else:
|
||||||
width, height = None, None
|
width, height = None, None
|
||||||
formats.append({
|
formats.append({
|
||||||
|
@ -1,25 +1,28 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
determine_ext,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DaumIE(InfoExtractor):
|
class DaumIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
|
||||||
IE_NAME = u'daum.net'
|
IE_NAME = 'daum.net'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
|
'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
|
||||||
u'file': u'52554690.mp4',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '52554690',
|
||||||
u'title': u'DOTA 2GETHER 시즌2 6회 - 2부',
|
'ext': 'mp4',
|
||||||
u'description': u'DOTA 2GETHER 시즌2 6회 - 2부',
|
'title': 'DOTA 2GETHER 시즌2 6회 - 2부',
|
||||||
u'upload_date': u'20130831',
|
'description': 'DOTA 2GETHER 시즌2 6회 - 2부',
|
||||||
u'duration': 3868,
|
'upload_date': '20130831',
|
||||||
|
'duration': 3868,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -30,14 +33,14 @@ class DaumIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(canonical_url, video_id)
|
webpage = self._download_webpage(canonical_url, video_id)
|
||||||
full_id = self._search_regex(
|
full_id = self._search_regex(
|
||||||
r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]',
|
r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]',
|
||||||
webpage, u'full id')
|
webpage, 'full id')
|
||||||
query = compat_urllib_parse.urlencode({'vid': full_id})
|
query = compat_urllib_parse.urlencode({'vid': full_id})
|
||||||
info = self._download_xml(
|
info = self._download_xml(
|
||||||
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
||||||
u'Downloading video info')
|
'Downloading video info')
|
||||||
urls = self._download_xml(
|
urls = self._download_xml(
|
||||||
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
|
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
|
||||||
video_id, u'Downloading video formats info')
|
video_id, 'Downloading video formats info')
|
||||||
|
|
||||||
self.to_screen(u'%s: Getting video urls' % video_id)
|
self.to_screen(u'%s: Getting video urls' % video_id)
|
||||||
formats = []
|
formats = []
|
||||||
@ -53,7 +56,6 @@ class DaumIE(InfoExtractor):
|
|||||||
format_url = url_doc.find('result/url').text
|
format_url = url_doc.find('result/url').text
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
'ext': determine_ext(format_url),
|
|
||||||
'format_id': profile,
|
'format_id': profile,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -1,60 +0,0 @@
|
|||||||
import re
|
|
||||||
import os
|
|
||||||
import socket
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
compat_http_client,
|
|
||||||
compat_str,
|
|
||||||
compat_urllib_error,
|
|
||||||
compat_urllib_parse,
|
|
||||||
compat_urllib_request,
|
|
||||||
|
|
||||||
ExtractorError,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class DepositFilesIE(InfoExtractor):
|
|
||||||
"""Information extractor for depositfiles.com"""
|
|
||||||
|
|
||||||
_VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
file_id = url.split('/')[-1]
|
|
||||||
# Rebuild url in english locale
|
|
||||||
url = 'http://depositfiles.com/en/files/' + file_id
|
|
||||||
|
|
||||||
# Retrieve file webpage with 'Free download' button pressed
|
|
||||||
free_download_indication = {'gateway_result' : '1'}
|
|
||||||
request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
|
|
||||||
try:
|
|
||||||
self.report_download_webpage(file_id)
|
|
||||||
webpage = compat_urllib_request.urlopen(request).read()
|
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
|
||||||
raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err))
|
|
||||||
|
|
||||||
# Search for the real file URL
|
|
||||||
mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
|
|
||||||
if (mobj is None) or (mobj.group(1) is None):
|
|
||||||
# Try to figure out reason of the error.
|
|
||||||
mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
|
|
||||||
if (mobj is not None) and (mobj.group(1) is not None):
|
|
||||||
restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
|
|
||||||
raise ExtractorError(u'%s' % restriction_message)
|
|
||||||
else:
|
|
||||||
raise ExtractorError(u'Unable to extract download URL from: %s' % url)
|
|
||||||
|
|
||||||
file_url = mobj.group(1)
|
|
||||||
file_extension = os.path.splitext(file_url)[1][1:]
|
|
||||||
|
|
||||||
# Search for file title
|
|
||||||
file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title')
|
|
||||||
|
|
||||||
return [{
|
|
||||||
'id': file_id.decode('utf-8'),
|
|
||||||
'url': file_url.decode('utf-8'),
|
|
||||||
'uploader': None,
|
|
||||||
'upload_date': None,
|
|
||||||
'title': file_title,
|
|
||||||
'ext': file_extension.decode('utf-8'),
|
|
||||||
}]
|
|
43
youtube_dl/extractor/engadget.py
Normal file
43
youtube_dl/extractor/engadget.py
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .fivemin import FiveMinIE
|
||||||
|
from ..utils import (
|
||||||
|
url_basename,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class EngadgetIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)https?://www.engadget.com/
|
||||||
|
(?:video/5min/(?P<id>\d+)|
|
||||||
|
[\d/]+/.*?)
|
||||||
|
'''
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.engadget.com/video/5min/518153925/',
|
||||||
|
'md5': 'c6820d4828a5064447a4d9fc73f312c9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '518153925',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Samsung Galaxy Tab Pro 8.4 Review',
|
||||||
|
},
|
||||||
|
'add_ie': ['FiveMin'],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
if video_id is not None:
|
||||||
|
return FiveMinIE._build_result(video_id)
|
||||||
|
else:
|
||||||
|
title = url_basename(url)
|
||||||
|
webpage = self._download_webpage(url, title)
|
||||||
|
ids = re.findall(r'<iframe[^>]+?playList=(\d+)', webpage)
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'title': title,
|
||||||
|
'entries': [FiveMinIE._build_result(id) for id in ids]
|
||||||
|
}
|
@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
@ -9,16 +11,15 @@ from ..utils import (
|
|||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
|
urlencode_postdata,
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class FacebookIE(InfoExtractor):
|
class FacebookIE(InfoExtractor):
|
||||||
"""Information Extractor for Facebook"""
|
|
||||||
|
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:https?://)?(?:\w+\.)?facebook\.com/
|
https?://(?:\w+\.)?facebook\.com/
|
||||||
(?:[^#?]*\#!/)?
|
(?:[^#?]*\#!/)?
|
||||||
(?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
|
(?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
|
||||||
(?:v|video_id)=(?P<id>[0-9]+)
|
(?:v|video_id)=(?P<id>[0-9]+)
|
||||||
@ -26,21 +27,18 @@ class FacebookIE(InfoExtractor):
|
|||||||
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
|
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
|
||||||
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
|
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
|
||||||
_NETRC_MACHINE = 'facebook'
|
_NETRC_MACHINE = 'facebook'
|
||||||
IE_NAME = u'facebook'
|
IE_NAME = 'facebook'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'https://www.facebook.com/photo.php?v=120708114770723',
|
'url': 'https://www.facebook.com/photo.php?v=120708114770723',
|
||||||
u'file': u'120708114770723.mp4',
|
'md5': '48975a41ccc4b7a581abd68651c1a5a8',
|
||||||
u'md5': u'48975a41ccc4b7a581abd68651c1a5a8',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '120708114770723',
|
||||||
u"duration": 279,
|
'ext': 'mp4',
|
||||||
u"title": u"PEOPLE ARE AWESOME 2013"
|
'duration': 279,
|
||||||
|
'title': 'PEOPLE ARE AWESOME 2013',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def report_login(self):
|
|
||||||
"""Report attempt to log in."""
|
|
||||||
self.to_screen(u'Logging in')
|
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(useremail, password) = self._get_login_info()
|
(useremail, password) = self._get_login_info()
|
||||||
if useremail is None:
|
if useremail is None:
|
||||||
@ -48,11 +46,13 @@ class FacebookIE(InfoExtractor):
|
|||||||
|
|
||||||
login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
|
login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
|
||||||
login_page_req.add_header('Cookie', 'locale=en_US')
|
login_page_req.add_header('Cookie', 'locale=en_US')
|
||||||
self.report_login()
|
login_page = self._download_webpage(login_page_req, None,
|
||||||
login_page = self._download_webpage(login_page_req, None, note=False,
|
note='Downloading login page',
|
||||||
errnote=u'Unable to download login page')
|
errnote='Unable to download login page')
|
||||||
lsd = self._search_regex(r'"lsd":"(\w*?)"', login_page, u'lsd')
|
lsd = self._search_regex(
|
||||||
lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, u'lgnrnd')
|
r'<input type="hidden" name="lsd" value="([^"]*)"',
|
||||||
|
login_page, 'lsd')
|
||||||
|
lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd')
|
||||||
|
|
||||||
login_form = {
|
login_form = {
|
||||||
'email': useremail,
|
'email': useremail,
|
||||||
@ -65,27 +65,29 @@ class FacebookIE(InfoExtractor):
|
|||||||
'timezone': '-60',
|
'timezone': '-60',
|
||||||
'trynum': '1',
|
'trynum': '1',
|
||||||
}
|
}
|
||||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form))
|
||||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
try:
|
try:
|
||||||
login_results = compat_urllib_request.urlopen(request).read()
|
login_results = self._download_webpage(request, None,
|
||||||
|
note='Logging in', errnote='unable to fetch login page')
|
||||||
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
|
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
|
||||||
self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
|
self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
|
||||||
return
|
return
|
||||||
|
|
||||||
check_form = {
|
check_form = {
|
||||||
'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, u'fb_dtsg'),
|
'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'),
|
||||||
'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, u'nh'),
|
'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'),
|
||||||
'name_action_selected': 'dont_save',
|
'name_action_selected': 'dont_save',
|
||||||
'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, u'continue'),
|
'submit[Continue]': self._search_regex(r'<button[^>]+value="(.*?)"[^>]+name="submit\[Continue\]"', login_results, 'continue'),
|
||||||
}
|
}
|
||||||
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form))
|
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
|
||||||
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
check_response = compat_urllib_request.urlopen(check_req).read()
|
check_response = self._download_webpage(check_req, None,
|
||||||
|
note='Confirming login')
|
||||||
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
|
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
|
||||||
self._downloader.report_warning(u'Unable to confirm login, you have to login in your brower and authorize the login.')
|
self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
|
self._downloader.report_warning('unable to log in: %s' % compat_str(err))
|
||||||
return
|
return
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
@ -93,8 +95,6 @@ class FacebookIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
|
url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
|
||||||
@ -107,10 +107,10 @@ class FacebookIE(InfoExtractor):
|
|||||||
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
||||||
if m_msg is not None:
|
if m_msg is not None:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
u'The video is not available, Facebook said: "%s"' % m_msg.group(1),
|
'The video is not available, Facebook said: "%s"' % m_msg.group(1),
|
||||||
expected=True)
|
expected=True)
|
||||||
else:
|
else:
|
||||||
raise ExtractorError(u'Cannot parse data')
|
raise ExtractorError('Cannot parse data')
|
||||||
data = dict(json.loads(m.group(1)))
|
data = dict(json.loads(m.group(1)))
|
||||||
params_raw = compat_urllib_parse.unquote(data['params'])
|
params_raw = compat_urllib_parse.unquote(data['params'])
|
||||||
params = json.loads(params_raw)
|
params = json.loads(params_raw)
|
||||||
@ -119,19 +119,15 @@ class FacebookIE(InfoExtractor):
|
|||||||
if not video_url:
|
if not video_url:
|
||||||
video_url = video_data['sd_src']
|
video_url = video_data['sd_src']
|
||||||
if not video_url:
|
if not video_url:
|
||||||
raise ExtractorError(u'Cannot find video URL')
|
raise ExtractorError('Cannot find video URL')
|
||||||
video_duration = int(video_data['video_duration'])
|
|
||||||
thumbnail = video_data['thumbnail_src']
|
|
||||||
|
|
||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, u'title')
|
r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title')
|
||||||
|
|
||||||
info = {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'mp4',
|
'duration': int(video_data['video_duration']),
|
||||||
'duration': video_duration,
|
'thumbnail': video_data['thumbnail_src'],
|
||||||
'thumbnail': thumbnail,
|
|
||||||
}
|
}
|
||||||
return [info]
|
|
||||||
|
56
youtube_dl/extractor/fivemin.py
Normal file
56
youtube_dl/extractor/fivemin.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class FiveMinIE(InfoExtractor):
|
||||||
|
IE_NAME = '5min'
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
(?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(.*?&)?playList=|
|
||||||
|
5min:)
|
||||||
|
(?P<id>\d+)
|
||||||
|
'''
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
# From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/
|
||||||
|
'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791',
|
||||||
|
'md5': '4f7b0b79bf1a470e5004f7112385941d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '518013791',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'iPad Mini with Retina Display Review',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _build_result(cls, video_id):
|
||||||
|
return cls.url_result('5min:%s' % video_id, cls.ie_key())
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
info = self._download_json(
|
||||||
|
'https://syn.5min.com/handlers/SenseHandler.ashx?func=GetResults&'
|
||||||
|
'playlist=%s&url=https' % video_id,
|
||||||
|
video_id)['binding'][0]
|
||||||
|
|
||||||
|
second_id = compat_str(int(video_id[:-2]) + 1)
|
||||||
|
formats = []
|
||||||
|
for quality, height in [(1, 320), (2, 480), (4, 720), (8, 1080)]:
|
||||||
|
if any(r['ID'] == quality for r in info['Renditions']):
|
||||||
|
formats.append({
|
||||||
|
'format_id': compat_str(quality),
|
||||||
|
'url': 'http://avideos.5min.com/%s/%s/%s_%s.mp4' % (second_id[-3:], second_id, video_id, quality),
|
||||||
|
'height': height,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': info['Title'],
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -8,8 +8,8 @@ from ..utils import (
|
|||||||
unified_strdate,
|
unified_strdate,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
clean_html,
|
||||||
)
|
)
|
||||||
from youtube_dl.utils import clean_html
|
|
||||||
|
|
||||||
|
|
||||||
class FourTubeIE(InfoExtractor):
|
class FourTubeIE(InfoExtractor):
|
||||||
|
@ -1,12 +1,13 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class FunnyOrDieIE(InfoExtractor):
|
class FunnyOrDieIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$'
|
_VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?P<type>embed|videos)/(?P<id>[0-9a-f]+)(?:$|[?#/])'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version',
|
'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version',
|
||||||
'file': '0732f586d7.mp4',
|
'file': '0732f586d7.mp4',
|
||||||
@ -30,10 +31,23 @@ class FunnyOrDieIE(InfoExtractor):
|
|||||||
[r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''],
|
[r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''],
|
||||||
webpage, 'video URL', flags=re.DOTALL)
|
webpage, 'video URL', flags=re.DOTALL)
|
||||||
|
|
||||||
|
if mobj.group('type') == 'embed':
|
||||||
|
post_json = self._search_regex(
|
||||||
|
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
|
||||||
|
post = json.loads(post_json)
|
||||||
|
title = post['name']
|
||||||
|
description = post.get('description')
|
||||||
|
thumbnail = post.get('picture')
|
||||||
|
else:
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
thumbnail = None
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': self._og_search_title(webpage),
|
'title': title,
|
||||||
'description': self._og_search_description(webpage),
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -6,13 +8,14 @@ from .common import InfoExtractor
|
|||||||
class GamekingsIE(InfoExtractor):
|
class GamekingsIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
|
_VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/",
|
'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
|
||||||
u'file': u'20130811.mp4',
|
|
||||||
# MD5 is flaky, seems to change regularly
|
# MD5 is flaky, seems to change regularly
|
||||||
#u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3',
|
# 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review",
|
'id': '20130811',
|
||||||
u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.",
|
'ext': 'mp4',
|
||||||
|
'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
|
||||||
|
'description': 'md5:632e61a9f97d700e83f43d77ddafb6a4',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
134
youtube_dl/extractor/gdcvault.py
Normal file
134
youtube_dl/extractor/gdcvault.py
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
|
)
|
||||||
|
|
||||||
|
class GDCVaultIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple',
|
||||||
|
'md5': '7ce8388f544c88b7ac11c7ab1b593704',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1019721',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)'
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1015683',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Embracing the Dark Art of Mathematical Modeling in AI'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # Requires rtmpdump
|
||||||
|
}
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _parse_mp4(self, xml_description):
|
||||||
|
video_formats = []
|
||||||
|
mp4_video = xml_description.find('./metadata/mp4video')
|
||||||
|
if mp4_video is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video.text)
|
||||||
|
video_root = mobj.group('root')
|
||||||
|
formats = xml_description.findall('./metadata/MBRVideos/MBRVideo')
|
||||||
|
for format in formats:
|
||||||
|
mobj = re.match(r'mp4\:(?P<path>.*)', format.find('streamName').text)
|
||||||
|
url = video_root + mobj.group('path')
|
||||||
|
vbr = format.find('bitrate').text
|
||||||
|
video_formats.append({
|
||||||
|
'url': url,
|
||||||
|
'vbr': int(vbr),
|
||||||
|
})
|
||||||
|
return video_formats
|
||||||
|
|
||||||
|
def _parse_flv(self, xml_description):
|
||||||
|
video_formats = []
|
||||||
|
akami_url = xml_description.find('./metadata/akamaiHost').text
|
||||||
|
slide_video_path = xml_description.find('./metadata/slideVideo').text
|
||||||
|
video_formats.append({
|
||||||
|
'url': 'rtmp://' + akami_url + '/' + slide_video_path,
|
||||||
|
'format_note': 'slide deck video',
|
||||||
|
'quality': -2,
|
||||||
|
'preference': -2,
|
||||||
|
'format_id': 'slides',
|
||||||
|
})
|
||||||
|
speaker_video_path = xml_description.find('./metadata/speakerVideo').text
|
||||||
|
video_formats.append({
|
||||||
|
'url': 'rtmp://' + akami_url + '/' + speaker_video_path,
|
||||||
|
'format_note': 'speaker video',
|
||||||
|
'quality': -1,
|
||||||
|
'preference': -1,
|
||||||
|
'format_id': 'speaker',
|
||||||
|
})
|
||||||
|
return video_formats
|
||||||
|
|
||||||
|
def _login(self, webpage_url, video_id):
|
||||||
|
(username, password) = self._get_login_info()
|
||||||
|
if username is None or password is None:
|
||||||
|
self.report_warning('It looks like ' + webpage_url + ' requires a login. Try specifying a username and password and try again.')
|
||||||
|
return None
|
||||||
|
|
||||||
|
mobj = re.match(r'(?P<root_url>https?://.*?/).*', webpage_url)
|
||||||
|
login_url = mobj.group('root_url') + 'api/login.php'
|
||||||
|
logout_url = mobj.group('root_url') + 'logout'
|
||||||
|
|
||||||
|
login_form = {
|
||||||
|
'email': username,
|
||||||
|
'password': password,
|
||||||
|
}
|
||||||
|
|
||||||
|
request = compat_urllib_request.Request(login_url, compat_urllib_parse.urlencode(login_form))
|
||||||
|
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
|
self._download_webpage(request, video_id, 'Logging in')
|
||||||
|
start_page = self._download_webpage(webpage_url, video_id, 'Getting authenticated video page')
|
||||||
|
self._download_webpage(logout_url, video_id, 'Logging out')
|
||||||
|
|
||||||
|
return start_page
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
webpage_url = 'http://www.gdcvault.com/play/' + video_id
|
||||||
|
start_page = self._download_webpage(webpage_url, video_id)
|
||||||
|
|
||||||
|
xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root', None, False)
|
||||||
|
|
||||||
|
if xml_root is None:
|
||||||
|
# Probably need to authenticate
|
||||||
|
start_page = self._login(webpage_url, video_id)
|
||||||
|
if start_page is None:
|
||||||
|
self.report_warning('Could not login.')
|
||||||
|
else:
|
||||||
|
# Grab the url from the authenticated page
|
||||||
|
xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root')
|
||||||
|
|
||||||
|
xml_name = self._html_search_regex(r'<iframe src=".*?\?xml=(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename', None, False)
|
||||||
|
if xml_name is None:
|
||||||
|
# Fallback to the older format
|
||||||
|
xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename')
|
||||||
|
|
||||||
|
xml_decription_url = xml_root + 'xml/' + xml_name
|
||||||
|
xml_description = self._download_xml(xml_decription_url, video_id)
|
||||||
|
|
||||||
|
video_title = xml_description.find('./metadata/title').text
|
||||||
|
video_formats = self._parse_mp4(xml_description)
|
||||||
|
if video_formats is None:
|
||||||
|
video_formats = self._parse_flv(xml_description)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_title,
|
||||||
|
'formats': video_formats,
|
||||||
|
}
|
@ -4,7 +4,6 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .youtube import YoutubeIE
|
from .youtube import YoutubeIE
|
||||||
@ -17,6 +16,7 @@ from ..utils import (
|
|||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
|
parse_xml,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
@ -24,6 +24,7 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
from .brightcove import BrightcoveIE
|
from .brightcove import BrightcoveIE
|
||||||
from .ooyala import OoyalaIE
|
from .ooyala import OoyalaIE
|
||||||
|
from .rutv import RUTVIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@ -101,6 +102,20 @@ class GenericIE(InfoExtractor):
|
|||||||
'title': '2cc213299525360.mov', # that's what we get
|
'title': '2cc213299525360.mov', # that's what we get
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# second style of embedded ooyala videos
|
||||||
|
{
|
||||||
|
'url': 'http://www.smh.com.au/tv/business/show/financial-review-sunday/behind-the-scenes-financial-review-sunday--4350201.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '13djJjYjptA1XpPx8r9kuzPyj3UZH0Uk',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Behind-the-scenes: Financial Review Sunday ',
|
||||||
|
'description': 'Step inside Channel Nine studios for an exclusive tour of its upcoming financial business show.',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
# google redirect
|
# google redirect
|
||||||
{
|
{
|
||||||
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
||||||
@ -116,7 +131,72 @@ class GenericIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': False,
|
'skip_download': False,
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
# embed.ly video
|
||||||
|
{
|
||||||
|
'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9ODmcdjQcHQ',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
|
||||||
|
'upload_date': '20140225',
|
||||||
|
'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
|
||||||
|
'uploader': 'Tested',
|
||||||
|
'uploader_id': 'testedcom',
|
||||||
|
},
|
||||||
|
# No need to test YoutubeIE here
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# funnyordie embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
|
||||||
|
'md5': '7cf780be104d40fea7bae52eed4a470e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '18e820ec3f',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
|
||||||
|
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# RUTV embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '776940',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Охотское море стало целиком российским',
|
||||||
|
'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# Embedded TED video
|
||||||
|
{
|
||||||
|
'url': 'http://en.support.wordpress.com/videos/ted-talks/',
|
||||||
|
'md5': 'deeeabcc1085eb2ba205474e7235a3d5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '981',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'My web playroom',
|
||||||
|
'uploader': 'Ze Frank',
|
||||||
|
'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
# nowvideo embed hidden behind percent encoding
|
||||||
|
{
|
||||||
|
'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
|
||||||
|
'md5': '2baf4ddd70f697d94b1c18cf796d5107',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '06e53103ca9aa',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
|
||||||
|
'description': 'No description',
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_download_webpage(self, video_id):
|
def report_download_webpage(self, video_id):
|
||||||
@ -142,9 +222,14 @@ class GenericIE(InfoExtractor):
|
|||||||
newurl = newurl.replace(' ', '%20')
|
newurl = newurl.replace(' ', '%20')
|
||||||
newheaders = dict((k,v) for k,v in req.headers.items()
|
newheaders = dict((k,v) for k,v in req.headers.items()
|
||||||
if k.lower() not in ("content-length", "content-type"))
|
if k.lower() not in ("content-length", "content-type"))
|
||||||
|
try:
|
||||||
|
# This function was deprecated in python 3.3 and removed in 3.4
|
||||||
|
origin_req_host = req.get_origin_req_host()
|
||||||
|
except AttributeError:
|
||||||
|
origin_req_host = req.origin_req_host
|
||||||
return HEADRequest(newurl,
|
return HEADRequest(newurl,
|
||||||
headers=newheaders,
|
headers=newheaders,
|
||||||
origin_req_host=req.get_origin_req_host(),
|
origin_req_host=origin_req_host,
|
||||||
unverifiable=True)
|
unverifiable=True)
|
||||||
else:
|
else:
|
||||||
raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp)
|
raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp)
|
||||||
@ -211,7 +296,7 @@ class GenericIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
assert ':' in default_search
|
assert ':' in default_search
|
||||||
return self.url_result(default_search + url)
|
return self.url_result(default_search + url)
|
||||||
video_id = os.path.splitext(url.split('/')[-1])[0]
|
video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
|
||||||
|
|
||||||
self.to_screen('%s: Requesting header' % video_id)
|
self.to_screen('%s: Requesting header' % video_id)
|
||||||
|
|
||||||
@ -257,12 +342,17 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
# Is it an RSS feed?
|
# Is it an RSS feed?
|
||||||
try:
|
try:
|
||||||
doc = xml.etree.ElementTree.fromstring(webpage.encode('utf-8'))
|
doc = parse_xml(webpage)
|
||||||
if doc.tag == 'rss':
|
if doc.tag == 'rss':
|
||||||
return self._extract_rss(url, video_id, doc)
|
return self._extract_rss(url, video_id, doc)
|
||||||
except compat_xml_parse_error:
|
except compat_xml_parse_error:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Sometimes embedded video player is hidden behind percent encoding
|
||||||
|
# (e.g. https://github.com/rg3/youtube-dl/issues/2448)
|
||||||
|
# Unescaping the whole page allows to handle those cases in a generic way
|
||||||
|
webpage = compat_urllib_parse.unquote(webpage)
|
||||||
|
|
||||||
# it's tempting to parse this further, but you would
|
# it's tempting to parse this further, but you would
|
||||||
# have to take into account all the variations like
|
# have to take into account all the variations like
|
||||||
# Video Title - Site Name
|
# Video Title - Site Name
|
||||||
@ -296,9 +386,9 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
# Look for embedded (iframe) Vimeo player
|
# Look for embedded (iframe) Vimeo player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+?src="((?:https?:)?//player\.vimeo\.com/video/.+?)"', webpage)
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
|
||||||
if mobj:
|
if mobj:
|
||||||
player_url = unescapeHTML(mobj.group(1))
|
player_url = unescapeHTML(mobj.group('url'))
|
||||||
surl = smuggle_url(player_url, {'Referer': url})
|
surl = smuggle_url(player_url, {'Referer': url})
|
||||||
return self.url_result(surl, 'Vimeo')
|
return self.url_result(surl, 'Vimeo')
|
||||||
|
|
||||||
@ -364,9 +454,10 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.url_result(mobj.group('url'))
|
return self.url_result(mobj.group('url'))
|
||||||
|
|
||||||
# Look for Ooyala videos
|
# Look for Ooyala videos
|
||||||
mobj = re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=([^"&]+)', webpage)
|
mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||||
|
re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return OoyalaIE._build_url_result(mobj.group(1))
|
return OoyalaIE._build_url_result(mobj.group('ec'))
|
||||||
|
|
||||||
# Look for Aparat videos
|
# Look for Aparat videos
|
||||||
mobj = re.search(r'<iframe src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
|
mobj = re.search(r'<iframe src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
|
||||||
@ -396,12 +487,44 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'), 'Facebook')
|
return self.url_result(mobj.group('url'), 'Facebook')
|
||||||
|
|
||||||
|
# Look for embedded VK player
|
||||||
|
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'), 'VK')
|
||||||
|
|
||||||
# Look for embedded Huffington Post player
|
# Look for embedded Huffington Post player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
|
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'), 'HuffPost')
|
return self.url_result(mobj.group('url'), 'HuffPost')
|
||||||
|
|
||||||
|
# Look for embed.ly
|
||||||
|
mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'))
|
||||||
|
mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
|
||||||
|
|
||||||
|
# Look for funnyordie embed
|
||||||
|
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
|
||||||
|
if matches:
|
||||||
|
urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie')
|
||||||
|
for eurl in matches]
|
||||||
|
return self.playlist_result(
|
||||||
|
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||||
|
|
||||||
|
# Look for embedded RUTV player
|
||||||
|
rutv_url = RUTVIE._extract_url(webpage)
|
||||||
|
if rutv_url:
|
||||||
|
return self.url_result(rutv_url, 'RUTV')
|
||||||
|
|
||||||
|
# Look for embedded TED player
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'), 'TED')
|
||||||
|
|
||||||
# Start with something easy: JW Player in SWFObject
|
# Start with something easy: JW Player in SWFObject
|
||||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
@ -413,6 +536,7 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj is None:
|
if mobj is None:
|
||||||
# Broaden the search a little bit: JWPlayer JS loader
|
# Broaden the search a little bit: JWPlayer JS loader
|
||||||
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
|
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
|
||||||
|
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
# Try to find twitter cards info
|
# Try to find twitter cards info
|
||||||
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
|
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
|
||||||
|
@ -46,6 +46,6 @@ class GoogleSearchIE(SearchInfoExtractor):
|
|||||||
'url': mobj.group(1)
|
'url': mobj.group(1)
|
||||||
})
|
})
|
||||||
|
|
||||||
if (len(entries) >= n) or not re.search(r'class="pn" id="pnnext"', webpage):
|
if (len(entries) >= n) or not re.search(r'id="pnnext"', webpage):
|
||||||
res['entries'] = entries[:n]
|
res['entries'] = entries[:n]
|
||||||
return res
|
return res
|
||||||
|
@ -3,6 +3,9 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class InstagramIE(InfoExtractor):
|
class InstagramIE(InfoExtractor):
|
||||||
@ -37,3 +40,68 @@ class InstagramIE(InfoExtractor):
|
|||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
'description': desc,
|
'description': desc,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class InstagramUserIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
|
||||||
|
IE_DESC = 'Instagram user profile'
|
||||||
|
IE_NAME = 'instagram:user'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
uploader_id = mobj.group('username')
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
page_count = 0
|
||||||
|
media_url = 'http://instagram.com/%s/media' % uploader_id
|
||||||
|
while True:
|
||||||
|
page = self._download_json(
|
||||||
|
media_url, uploader_id,
|
||||||
|
note='Downloading page %d ' % (page_count + 1),
|
||||||
|
)
|
||||||
|
page_count += 1
|
||||||
|
|
||||||
|
for it in page['items']:
|
||||||
|
if it.get('type') != 'video':
|
||||||
|
continue
|
||||||
|
like_count = int_or_none(it.get('likes', {}).get('count'))
|
||||||
|
user = it.get('user', {})
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'format_id': k,
|
||||||
|
'height': v.get('height'),
|
||||||
|
'width': v.get('width'),
|
||||||
|
'url': v['url'],
|
||||||
|
} for k, v in it['videos'].items()]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnails_el = it.get('images', {})
|
||||||
|
thumbnail = thumbnails_el.get('thumbnail', {}).get('url')
|
||||||
|
|
||||||
|
title = it.get('caption', {}).get('text', it['id'])
|
||||||
|
|
||||||
|
entries.append({
|
||||||
|
'id': it['id'],
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'webpage_url': it.get('link'),
|
||||||
|
'uploader': user.get('full_name'),
|
||||||
|
'uploader_id': user.get('username'),
|
||||||
|
'like_count': like_count,
|
||||||
|
'timestamp': int_or_none(it.get('created_time')),
|
||||||
|
})
|
||||||
|
|
||||||
|
if not page['items']:
|
||||||
|
break
|
||||||
|
max_id = page['items'][-1]['id']
|
||||||
|
media_url = (
|
||||||
|
'http://instagram.com/%s/media?max_id=%s' % (
|
||||||
|
uploader_id, max_id))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'entries': entries,
|
||||||
|
'id': uploader_id,
|
||||||
|
'title': uploader_id,
|
||||||
|
}
|
||||||
|
@ -6,7 +6,10 @@ from random import random
|
|||||||
from math import floor
|
from math import floor
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import compat_urllib_request
|
from ..utils import (
|
||||||
|
compat_urllib_request,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class IPrimaIE(InfoExtractor):
|
class IPrimaIE(InfoExtractor):
|
||||||
@ -36,6 +39,7 @@ class IPrimaIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # requires rtmpdump
|
'skip_download': True, # requires rtmpdump
|
||||||
},
|
},
|
||||||
|
'skip': 'Do not have permission to access this page',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -44,6 +48,10 @@ class IPrimaIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
if re.search(r'Nemáte oprávnění přistupovat na tuto stránku\.\s*</div>', webpage):
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s said: You do not have permission to access this page' % self.IE_NAME, expected=True)
|
||||||
|
|
||||||
player_url = (
|
player_url = (
|
||||||
'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' %
|
'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' %
|
||||||
(floor(random()*1073741824), floor(random()*1073741824))
|
(floor(random()*1073741824), floor(random()*1073741824))
|
||||||
|
@ -1,56 +1,61 @@
|
|||||||
# coding: utf-8
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
RegexNotFoundError,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class JukeboxIE(InfoExtractor):
|
class JukeboxIE(InfoExtractor):
|
||||||
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
|
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
|
||||||
_IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>'
|
_TEST = {
|
||||||
_VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"'
|
'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html',
|
||||||
_TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>'
|
'md5': '5dc6477e74b1e37042ac5acedd8413e5',
|
||||||
_IS_YOUTUBE = r'config":{"file":"(?P<youtube_url>http:[\\][/][\\][/]www[.]youtube[.]com[\\][/]watch[?]v=[^"]+)"'
|
'info_dict': {
|
||||||
|
'id': 'r303r',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Kosheen-En Vivo Pride',
|
||||||
|
'uploader': 'Kosheen',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('video_id')
|
video_id = mobj.group('video_id')
|
||||||
|
|
||||||
html = self._download_webpage(url, video_id)
|
html = self._download_webpage(url, video_id)
|
||||||
|
iframe_url = unescapeHTML(self._search_regex(r'<iframe .*src="([^"]*)"', html, 'iframe url'))
|
||||||
mobj = re.search(self._IFRAME, html)
|
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError(u'Cannot extract iframe url')
|
|
||||||
iframe_url = unescapeHTML(mobj.group('iframe'))
|
|
||||||
|
|
||||||
iframe_html = self._download_webpage(iframe_url, video_id, 'Downloading iframe')
|
iframe_html = self._download_webpage(iframe_url, video_id, 'Downloading iframe')
|
||||||
mobj = re.search(r'class="jkb_waiting"', iframe_html)
|
if re.search(r'class="jkb_waiting"', iframe_html) is not None:
|
||||||
if mobj is not None:
|
raise ExtractorError('Video is not available(in your country?)!')
|
||||||
raise ExtractorError(u'Video is not available(in your country?)!')
|
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
mobj = re.search(self._VIDEO_URL, iframe_html)
|
try:
|
||||||
if mobj is None:
|
video_url = self._search_regex(r'"config":{"file":"(?P<video_url>http:[^"]+\?mdtk=[0-9]+)"',
|
||||||
mobj = re.search(self._IS_YOUTUBE, iframe_html)
|
iframe_html, 'video url')
|
||||||
if mobj is None:
|
video_url = unescapeHTML(video_url).replace('\/', '/')
|
||||||
raise ExtractorError(u'Cannot extract video url')
|
except RegexNotFoundError:
|
||||||
youtube_url = unescapeHTML(mobj.group('youtube_url')).replace('\/','/')
|
youtube_url = self._search_regex(
|
||||||
self.to_screen(u'Youtube video detected')
|
r'config":{"file":"(http:\\/\\/www\.youtube\.com\\/watch\?v=[^"]+)"',
|
||||||
return self.url_result(youtube_url,ie='Youtube')
|
iframe_html, 'youtube url')
|
||||||
video_url = unescapeHTML(mobj.group('video_url')).replace('\/','/')
|
youtube_url = unescapeHTML(youtube_url).replace('\/', '/')
|
||||||
video_ext = unescapeHTML(mobj.group('video_ext'))
|
self.to_screen('Youtube video detected')
|
||||||
|
return self.url_result(youtube_url, ie='Youtube')
|
||||||
|
|
||||||
mobj = re.search(self._TITLE, html)
|
title = self._html_search_regex(r'<h1 class="inline">([^<]+)</h1>',
|
||||||
if mobj is None:
|
html, 'title')
|
||||||
raise ExtractorError(u'Cannot extract title')
|
artist = self._html_search_regex(r'<span id="infos_article_artist">([^<]+)</span>',
|
||||||
title = unescapeHTML(mobj.group('title'))
|
html, 'artist')
|
||||||
artist = unescapeHTML(mobj.group('artist'))
|
|
||||||
|
|
||||||
return [{'id': video_id,
|
return {
|
||||||
'url': video_url,
|
'id': video_id,
|
||||||
'title': artist + '-' + title,
|
'url': video_url,
|
||||||
'ext': video_ext
|
'title': artist + '-' + title,
|
||||||
}]
|
'uploader': artist,
|
||||||
|
}
|
||||||
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class KontrTubeIE(InfoExtractor):
|
class KontrTubeIE(InfoExtractor):
|
||||||
@ -32,27 +33,26 @@ class KontrTubeIE(InfoExtractor):
|
|||||||
|
|
||||||
video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
|
video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
|
||||||
thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
|
thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
|
||||||
title = self._html_search_regex(r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage,
|
title = self._html_search_regex(
|
||||||
'video title')
|
r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage, 'video title')
|
||||||
description = self._html_search_meta('description', webpage, 'video description')
|
description = self._html_search_meta('description', webpage, 'video description')
|
||||||
|
|
||||||
mobj = re.search(r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
|
mobj = re.search(
|
||||||
webpage)
|
r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', webpage)
|
||||||
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
||||||
|
|
||||||
view_count = self._html_search_regex(r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage,
|
view_count = self._html_search_regex(
|
||||||
'view count', fatal=False)
|
r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage, 'view count', fatal=False)
|
||||||
view_count = int(view_count) if view_count is not None else None
|
|
||||||
|
|
||||||
comment_count = None
|
comment_count = None
|
||||||
comment_str = self._html_search_regex(r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count',
|
comment_str = self._html_search_regex(
|
||||||
fatal=False)
|
r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count', fatal=False)
|
||||||
if comment_str.startswith('комментариев нет'):
|
if comment_str.startswith('комментариев нет'):
|
||||||
comment_count = 0
|
comment_count = 0
|
||||||
else:
|
else:
|
||||||
mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
|
mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
|
||||||
if mobj:
|
if mobj:
|
||||||
comment_count = int(mobj.group('total'))
|
comment_count = mobj.group('total')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -61,6 +61,6 @@ class KontrTubeIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'view_count': view_count,
|
'view_count': int_or_none(view_count),
|
||||||
'comment_count': comment_count,
|
'comment_count': int_or_none(comment_count),
|
||||||
}
|
}
|
@ -6,7 +6,8 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unified_strdate
|
unified_strdate,
|
||||||
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -34,11 +35,9 @@ class LifeNewsIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage('http://lifenews.ru/news/%s' % video_id, video_id, 'Downloading page')
|
webpage = self._download_webpage('http://lifenews.ru/news/%s' % video_id, video_id, 'Downloading page')
|
||||||
|
|
||||||
video_url = self._html_search_regex(
|
videos = re.findall(r'<video.*?poster="(?P<poster>[^"]+)".*?src="(?P<video>[^"]+)".*?></video>', webpage)
|
||||||
r'<video.*?src="([^"]+)".*?></video>', webpage, 'video URL')
|
if not videos:
|
||||||
|
raise ExtractorError('No media links available for %s' % video_id)
|
||||||
thumbnail = self._html_search_regex(
|
|
||||||
r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail')
|
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
title = self._og_search_title(webpage)
|
||||||
TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS'
|
TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS'
|
||||||
@ -57,13 +56,19 @@ class LifeNewsIE(InfoExtractor):
|
|||||||
if upload_date is not None:
|
if upload_date is not None:
|
||||||
upload_date = unified_strdate(upload_date)
|
upload_date = unified_strdate(upload_date)
|
||||||
|
|
||||||
return {
|
def make_entry(video_id, media, video_number=None):
|
||||||
'id': video_id,
|
return {
|
||||||
'url': video_url,
|
'id': video_id,
|
||||||
'thumbnail': thumbnail,
|
'url': media[1],
|
||||||
'title': title,
|
'thumbnail': media[0],
|
||||||
'description': description,
|
'title': title if video_number is None else '%s-video%s' % (title, video_number),
|
||||||
'view_count': int_or_none(view_count),
|
'description': description,
|
||||||
'comment_count': int_or_none(comment_count),
|
'view_count': int_or_none(view_count),
|
||||||
'upload_date': upload_date,
|
'comment_count': int_or_none(comment_count),
|
||||||
}
|
'upload_date': upload_date,
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(videos) == 1:
|
||||||
|
return make_entry(video_id, videos[0])
|
||||||
|
else:
|
||||||
|
return [make_entry(video_id, media, video_number+1) for video_number, media in enumerate(videos)]
|
@ -8,7 +8,9 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
ExtractorError
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
compat_str,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -19,16 +21,17 @@ class LyndaIE(SubtitlesInfoExtractor):
|
|||||||
_LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
|
_LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
|
||||||
_NETRC_MACHINE = 'lynda'
|
_NETRC_MACHINE = 'lynda'
|
||||||
|
|
||||||
_SUCCESSFUL_LOGIN_REGEX = r'<a href="https://www.lynda.com/home/userAccount/ChangeContactInfo.aspx" data-qa="eyebrow_account_menu">My account'
|
_SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'
|
||||||
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
|
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
|
||||||
|
|
||||||
ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
|
ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
|
'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
|
||||||
'file': '114408.mp4',
|
|
||||||
'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
|
'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '114408',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Using the exercise files',
|
'title': 'Using the exercise files',
|
||||||
'duration': 68
|
'duration': 68
|
||||||
}
|
}
|
||||||
@ -41,27 +44,44 @@ class LyndaIE(SubtitlesInfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group(1)
|
video_id = mobj.group(1)
|
||||||
|
|
||||||
page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id,
|
page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id,
|
||||||
video_id, 'Downloading video JSON')
|
'Downloading video JSON')
|
||||||
video_json = json.loads(page)
|
video_json = json.loads(page)
|
||||||
|
|
||||||
if 'Status' in video_json:
|
if 'Status' in video_json:
|
||||||
raise ExtractorError('lynda returned error: %s' % video_json['Message'], expected=True)
|
raise ExtractorError('lynda returned error: %s' % video_json['Message'], expected=True)
|
||||||
|
|
||||||
if video_json['HasAccess'] is False:
|
if video_json['HasAccess'] is False:
|
||||||
raise ExtractorError('Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True)
|
raise ExtractorError(
|
||||||
|
'Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True)
|
||||||
|
|
||||||
video_id = video_json['ID']
|
video_id = compat_str(video_json['ID'])
|
||||||
duration = video_json['DurationInSeconds']
|
duration = video_json['DurationInSeconds']
|
||||||
title = video_json['Title']
|
title = video_json['Title']
|
||||||
|
|
||||||
formats = [{'url': fmt['Url'],
|
formats = []
|
||||||
|
|
||||||
|
fmts = video_json.get('Formats')
|
||||||
|
if fmts:
|
||||||
|
formats.extend([
|
||||||
|
{
|
||||||
|
'url': fmt['Url'],
|
||||||
'ext': fmt['Extension'],
|
'ext': fmt['Extension'],
|
||||||
'width': fmt['Width'],
|
'width': fmt['Width'],
|
||||||
'height': fmt['Height'],
|
'height': fmt['Height'],
|
||||||
'filesize': fmt['FileSize'],
|
'filesize': fmt['FileSize'],
|
||||||
'format_id': str(fmt['Resolution'])
|
'format_id': str(fmt['Resolution'])
|
||||||
} for fmt in video_json['Formats']]
|
} for fmt in fmts])
|
||||||
|
|
||||||
|
prioritized_streams = video_json.get('PrioritizedStreams')
|
||||||
|
if prioritized_streams:
|
||||||
|
formats.extend([
|
||||||
|
{
|
||||||
|
'url': video_url,
|
||||||
|
'width': int_or_none(format_id),
|
||||||
|
'format_id': format_id,
|
||||||
|
} for format_id, video_url in prioritized_streams['0'].items()
|
||||||
|
])
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
@ -91,7 +111,7 @@ class LyndaIE(SubtitlesInfoExtractor):
|
|||||||
'stayPut': 'false'
|
'stayPut': 'false'
|
||||||
}
|
}
|
||||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
||||||
login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
|
login_page = self._download_webpage(request, None, 'Logging in as %s' % username)
|
||||||
|
|
||||||
# Not (yet) logged in
|
# Not (yet) logged in
|
||||||
m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
|
m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
|
||||||
@ -116,7 +136,7 @@ class LyndaIE(SubtitlesInfoExtractor):
|
|||||||
'stayPut': 'false',
|
'stayPut': 'false',
|
||||||
}
|
}
|
||||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
|
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
|
||||||
login_page = self._download_webpage(request, None, note='Confirming log in and log out from another device')
|
login_page = self._download_webpage(request, None, 'Confirming log in and log out from another device')
|
||||||
|
|
||||||
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
|
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
|
||||||
raise ExtractorError('Unable to log in')
|
raise ExtractorError('Unable to log in')
|
||||||
@ -150,7 +170,7 @@ class LyndaIE(SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
def _get_available_subtitles(self, video_id, webpage):
|
def _get_available_subtitles(self, video_id, webpage):
|
||||||
url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
|
url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
|
||||||
sub = self._download_webpage(url, None, note=False)
|
sub = self._download_webpage(url, None, False)
|
||||||
sub_json = json.loads(sub)
|
sub_json = json.loads(sub)
|
||||||
return {'en': url} if len(sub_json) > 0 else {}
|
return {'en': url} if len(sub_json) > 0 else {}
|
||||||
|
|
||||||
@ -179,6 +199,9 @@ class LyndaCourseIE(InfoExtractor):
|
|||||||
videos = []
|
videos = []
|
||||||
(username, _) = self._get_login_info()
|
(username, _) = self._get_login_info()
|
||||||
|
|
||||||
|
# Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
|
||||||
|
# by single video API anymore
|
||||||
|
|
||||||
for chapter in course_json['Chapters']:
|
for chapter in course_json['Chapters']:
|
||||||
for video in chapter['Videos']:
|
for video in chapter['Videos']:
|
||||||
if username is None and video['HasAccess'] is False:
|
if username is None and video['HasAccess'] is False:
|
||||||
|
66
youtube_dl/extractor/mailru.py
Normal file
66
youtube_dl/extractor/mailru.py
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class MailRuIE(InfoExtractor):
|
||||||
|
IE_NAME = 'mailru'
|
||||||
|
IE_DESC = 'Видео@Mail.Ru'
|
||||||
|
_VALID_URL = r'http://(?:www\.)?my\.mail\.ru/video/.*#video=/?(?P<id>[^/]+/[^/]+/[^/]+/\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
|
||||||
|
'md5': 'dea205f03120046894db4ebb6159879a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '46301138',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
|
||||||
|
'upload_date': '20140224',
|
||||||
|
'uploader': 'sonypicturesrus',
|
||||||
|
'uploader_id': 'sonypicturesrus@mail.ru',
|
||||||
|
'duration': 184,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
video_data = self._download_json(
|
||||||
|
'http://videoapi.my.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
|
author = video_data['author']
|
||||||
|
uploader = author['name']
|
||||||
|
uploader_id = author['id']
|
||||||
|
|
||||||
|
movie = video_data['movie']
|
||||||
|
content_id = str(movie['contentId'])
|
||||||
|
title = movie['title']
|
||||||
|
thumbnail = movie['poster']
|
||||||
|
duration = movie['duration']
|
||||||
|
|
||||||
|
upload_date = datetime.datetime.fromtimestamp(video_data['timestamp']).strftime('%Y%m%d')
|
||||||
|
view_count = video_data['views_count']
|
||||||
|
|
||||||
|
formats = [
|
||||||
|
{
|
||||||
|
'url': video['url'],
|
||||||
|
'format_id': video['name'],
|
||||||
|
} for video in video_data['videos']
|
||||||
|
]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': content_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -9,104 +11,103 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
class MetacafeIE(InfoExtractor):
|
|
||||||
"""Information Extractor for metacafe.com."""
|
|
||||||
|
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
|
class MetacafeIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
|
||||||
_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
|
_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
|
||||||
_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
|
_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
|
||||||
IE_NAME = u'metacafe'
|
IE_NAME = 'metacafe'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
# Youtube video
|
# Youtube video
|
||||||
{
|
{
|
||||||
u"add_ie": ["Youtube"],
|
'add_ie': ['Youtube'],
|
||||||
u"url": u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
|
'url': 'http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/',
|
||||||
u"file": u"_aUehQsCQtM.mp4",
|
'info_dict': {
|
||||||
u"info_dict": {
|
'id': '_aUehQsCQtM',
|
||||||
u"upload_date": u"20090102",
|
'ext': 'mp4',
|
||||||
u"title": u"The Electric Company | \"Short I\" | PBS KIDS GO!",
|
'upload_date': '20090102',
|
||||||
u"description": u"md5:2439a8ef6d5a70e380c22f5ad323e5a8",
|
'title': 'The Electric Company | "Short I" | PBS KIDS GO!',
|
||||||
u"uploader": u"PBS",
|
'description': 'md5:2439a8ef6d5a70e380c22f5ad323e5a8',
|
||||||
u"uploader_id": u"PBS"
|
'uploader': 'PBS',
|
||||||
}
|
'uploader_id': 'PBS'
|
||||||
},
|
}
|
||||||
# Normal metacafe video
|
|
||||||
{
|
|
||||||
u'url': u'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/',
|
|
||||||
u'md5': u'6e0bca200eaad2552e6915ed6fd4d9ad',
|
|
||||||
u'info_dict': {
|
|
||||||
u'id': u'11121940',
|
|
||||||
u'ext': u'mp4',
|
|
||||||
u'title': u'News: Stuff You Won\'t Do with Your PlayStation 4',
|
|
||||||
u'uploader': u'ign',
|
|
||||||
u'description': u'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.',
|
|
||||||
},
|
},
|
||||||
},
|
# Normal metacafe video
|
||||||
# AnyClip video
|
{
|
||||||
{
|
'url': 'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/',
|
||||||
u"url": u"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/",
|
'md5': '6e0bca200eaad2552e6915ed6fd4d9ad',
|
||||||
u"file": u"an-dVVXnuY7Jh77J.mp4",
|
'info_dict': {
|
||||||
u"info_dict": {
|
'id': '11121940',
|
||||||
u"title": u"The Andromeda Strain (1971): Stop the Bomb Part 3",
|
'ext': 'mp4',
|
||||||
u"uploader": u"anyclip",
|
'title': 'News: Stuff You Won\'t Do with Your PlayStation 4',
|
||||||
u"description": u"md5:38c711dd98f5bb87acf973d573442e67",
|
'uploader': 'ign',
|
||||||
|
'description': 'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.',
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
# AnyClip video
|
||||||
# age-restricted video
|
{
|
||||||
{
|
'url': 'http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/',
|
||||||
u'url': u'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/',
|
'info_dict': {
|
||||||
u'md5': u'98dde7c1a35d02178e8ab7560fe8bd09',
|
'id': 'an-dVVXnuY7Jh77J',
|
||||||
u'info_dict': {
|
'ext': 'mp4',
|
||||||
u'id': u'5186653',
|
'title': 'The Andromeda Strain (1971): Stop the Bomb Part 3',
|
||||||
u'ext': u'mp4',
|
'uploader': 'anyclip',
|
||||||
u'title': u'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.',
|
'description': 'md5:38c711dd98f5bb87acf973d573442e67',
|
||||||
u'uploader': u'Dwayne Pipe',
|
},
|
||||||
u'description': u'md5:950bf4c581e2c059911fa3ffbe377e4b',
|
|
||||||
u'age_limit': 18,
|
|
||||||
},
|
},
|
||||||
},
|
# age-restricted video
|
||||||
# cbs video
|
{
|
||||||
{
|
'url': 'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/',
|
||||||
u'url': u'http://www.metacafe.com/watch/cb-0rOxMBabDXN6/samsung_galaxy_note_2_samsungs_next_generation_phablet/',
|
'md5': '98dde7c1a35d02178e8ab7560fe8bd09',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'id': u'0rOxMBabDXN6',
|
'id': '5186653',
|
||||||
u'ext': u'flv',
|
'ext': 'mp4',
|
||||||
u'title': u'Samsung Galaxy Note 2: Samsung\'s next-generation phablet',
|
'title': 'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.',
|
||||||
u'description': u'md5:54d49fac53d26d5a0aaeccd061ada09d',
|
'uploader': 'Dwayne Pipe',
|
||||||
u'duration': 129,
|
'description': 'md5:950bf4c581e2c059911fa3ffbe377e4b',
|
||||||
|
'age_limit': 18,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
u'params': {
|
# cbs video
|
||||||
# rtmp download
|
{
|
||||||
u'skip_download': True,
|
'url': 'http://www.metacafe.com/watch/cb-8VD4r_Zws8VP/open_this_is_face_the_nation_february_9/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8VD4r_Zws8VP',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Open: This is Face the Nation, February 9',
|
||||||
|
'description': 'md5:8a9ceec26d1f7ed6eab610834cc1a476',
|
||||||
|
'duration': 96,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def report_disclaimer(self):
|
def report_disclaimer(self):
|
||||||
"""Report disclaimer retrieval."""
|
self.to_screen('Retrieving disclaimer')
|
||||||
self.to_screen(u'Retrieving disclaimer')
|
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
# Retrieve disclaimer
|
# Retrieve disclaimer
|
||||||
self.report_disclaimer()
|
self.report_disclaimer()
|
||||||
self._download_webpage(self._DISCLAIMER, None, False, u'Unable to retrieve disclaimer')
|
self._download_webpage(self._DISCLAIMER, None, False, 'Unable to retrieve disclaimer')
|
||||||
|
|
||||||
# Confirm age
|
# Confirm age
|
||||||
disclaimer_form = {
|
disclaimer_form = {
|
||||||
'filters': '0',
|
'filters': '0',
|
||||||
'submit': "Continue - I'm over 18",
|
'submit': "Continue - I'm over 18",
|
||||||
}
|
}
|
||||||
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
|
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
|
||||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
self.report_age_confirmation()
|
self.report_age_confirmation()
|
||||||
self._download_webpage(request, None, False, u'Unable to confirm age')
|
self._download_webpage(request, None, False, 'Unable to confirm age')
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# Extract id and simplified title from URL
|
# Extract id and simplified title from URL
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
raise ExtractorError('Invalid URL: %s' % url)
|
||||||
|
|
||||||
video_id = mobj.group(1)
|
video_id = mobj.group(1)
|
||||||
|
|
||||||
@ -153,23 +154,24 @@ class MetacafeIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
|
mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
raise ExtractorError(u'Unable to extract media URL')
|
raise ExtractorError('Unable to extract media URL')
|
||||||
vardict = compat_parse_qs(mobj.group(1))
|
vardict = compat_parse_qs(mobj.group(1))
|
||||||
if 'mediaData' not in vardict:
|
if 'mediaData' not in vardict:
|
||||||
raise ExtractorError(u'Unable to extract media URL')
|
raise ExtractorError('Unable to extract media URL')
|
||||||
mobj = re.search(r'"mediaURL":"(?P<mediaURL>http.*?)",(.*?)"key":"(?P<key>.*?)"', vardict['mediaData'][0])
|
mobj = re.search(
|
||||||
|
r'"mediaURL":"(?P<mediaURL>http.*?)",(.*?)"key":"(?P<key>.*?)"', vardict['mediaData'][0])
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
raise ExtractorError(u'Unable to extract media URL')
|
raise ExtractorError('Unable to extract media URL')
|
||||||
mediaURL = mobj.group('mediaURL').replace('\\/', '/')
|
mediaURL = mobj.group('mediaURL').replace('\\/', '/')
|
||||||
video_url = '%s?__gda__=%s' % (mediaURL, mobj.group('key'))
|
video_url = '%s?__gda__=%s' % (mediaURL, mobj.group('key'))
|
||||||
video_ext = determine_ext(video_url)
|
video_ext = determine_ext(video_url)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title')
|
video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, 'title')
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
video_uploader = self._html_search_regex(
|
video_uploader = self._html_search_regex(
|
||||||
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
|
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
|
||||||
webpage, u'uploader nickname', fatal=False)
|
webpage, 'uploader nickname', fatal=False)
|
||||||
|
|
||||||
if re.search(r'"contentRating":"restricted"', webpage) is not None:
|
if re.search(r'"contentRating":"restricted"', webpage) is not None:
|
||||||
age_limit = 18
|
age_limit = 18
|
||||||
@ -177,14 +179,12 @@ class MetacafeIE(InfoExtractor):
|
|||||||
age_limit = 0
|
age_limit = 0
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'video',
|
'id': video_id,
|
||||||
'id': video_id,
|
'url': video_url,
|
||||||
'url': video_url,
|
|
||||||
'description': description,
|
'description': description,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'upload_date': None,
|
'title': video_title,
|
||||||
'title': video_title,
|
|
||||||
'thumbnail':thumbnail,
|
'thumbnail':thumbnail,
|
||||||
'ext': video_ext,
|
'ext': video_ext,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,7 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
compat_urllib_parse,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -15,8 +16,9 @@ class MixcloudIE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
|
'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
|
||||||
'file': 'dholbach-cryptkeeper.mp3',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'dholbach-cryptkeeper',
|
||||||
|
'ext': 'mp3',
|
||||||
'title': 'Cryptkeeper',
|
'title': 'Cryptkeeper',
|
||||||
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
||||||
'uploader': 'Daniel Holbach',
|
'uploader': 'Daniel Holbach',
|
||||||
@ -45,7 +47,7 @@ class MixcloudIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
uploader = mobj.group(1)
|
uploader = mobj.group(1)
|
||||||
cloudcast_name = mobj.group(2)
|
cloudcast_name = mobj.group(2)
|
||||||
track_id = '-'.join((uploader, cloudcast_name))
|
track_id = compat_urllib_parse.unquote('-'.join((uploader, cloudcast_name)))
|
||||||
|
|
||||||
webpage = self._download_webpage(url, track_id)
|
webpage = self._download_webpage(url, track_id)
|
||||||
|
|
||||||
|
@ -5,9 +5,12 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
|
HEADRequest,
|
||||||
|
unescapeHTML,
|
||||||
url_basename,
|
url_basename,
|
||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
)
|
)
|
||||||
@ -18,6 +21,7 @@ def _media_xml_tag(tag):
|
|||||||
|
|
||||||
|
|
||||||
class MTVServicesInfoExtractor(InfoExtractor):
|
class MTVServicesInfoExtractor(InfoExtractor):
|
||||||
|
_MOBILE_TEMPLATE = None
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _id_from_uri(uri):
|
def _id_from_uri(uri):
|
||||||
return uri.split(':')[-1]
|
return uri.split(':')[-1]
|
||||||
@ -39,9 +43,29 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
return thumb_node.attrib['url']
|
return thumb_node.attrib['url']
|
||||||
|
|
||||||
def _extract_video_formats(self, mdoc):
|
def _extract_mobile_video_formats(self, mtvn_id):
|
||||||
if re.match(r'.*/error_country_block\.swf$', mdoc.find('.//src').text) is not None:
|
webpage_url = self._MOBILE_TEMPLATE % mtvn_id
|
||||||
raise ExtractorError('This video is not available from your country.', expected=True)
|
req = compat_urllib_request.Request(webpage_url)
|
||||||
|
# Otherwise we get a webpage that would execute some javascript
|
||||||
|
req.add_header('Youtubedl-user-agent', 'curl/7')
|
||||||
|
webpage = self._download_webpage(req, mtvn_id,
|
||||||
|
'Downloading mobile page')
|
||||||
|
metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
|
||||||
|
req = HEADRequest(metrics_url)
|
||||||
|
response = self._request_webpage(req, mtvn_id, 'Resolving url')
|
||||||
|
url = response.geturl()
|
||||||
|
# Transform the url to get the best quality:
|
||||||
|
url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1)
|
||||||
|
return [{'url': url,'ext': 'mp4'}]
|
||||||
|
|
||||||
|
def _extract_video_formats(self, mdoc, mtvn_id):
|
||||||
|
if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None:
|
||||||
|
if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
|
||||||
|
self.to_screen('The normal version is not available from your '
|
||||||
|
'country, trying with the mobile version')
|
||||||
|
return self._extract_mobile_video_formats(mtvn_id)
|
||||||
|
raise ExtractorError('This video is not available from your country.',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for rendition in mdoc.findall('.//rendition'):
|
for rendition in mdoc.findall('.//rendition'):
|
||||||
@ -94,9 +118,16 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
raise ExtractorError('Could not find video title')
|
raise ExtractorError('Could not find video title')
|
||||||
title = title.strip()
|
title = title.strip()
|
||||||
|
|
||||||
|
# This a short id that's used in the webpage urls
|
||||||
|
mtvn_id = None
|
||||||
|
mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
||||||
|
'scheme', 'urn:mtvn:id')
|
||||||
|
if mtvn_id_node is not None:
|
||||||
|
mtvn_id = mtvn_id_node.text
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': self._extract_video_formats(mediagen_doc),
|
'formats': self._extract_video_formats(mediagen_doc, mtvn_id),
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'thumbnail': self._get_thumbnail_url(uri, itemdoc),
|
'thumbnail': self._get_thumbnail_url(uri, itemdoc),
|
||||||
'description': description,
|
'description': description,
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import binascii
|
import binascii
|
||||||
import base64
|
import base64
|
||||||
import hashlib
|
import hashlib
|
||||||
@ -14,18 +16,16 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class MyVideoIE(InfoExtractor):
|
class MyVideoIE(InfoExtractor):
|
||||||
"""Information Extractor for myvideo.de."""
|
_VALID_URL = r'http://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*'
|
||||||
|
IE_NAME = 'myvideo'
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*'
|
|
||||||
IE_NAME = u'myvideo'
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
|
'url': 'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
|
||||||
u'file': u'8229274.flv',
|
'md5': '2d2753e8130479ba2cb7e0a37002053e',
|
||||||
u'md5': u'2d2753e8130479ba2cb7e0a37002053e',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '8229274',
|
||||||
u"title": u"bowling-fail-or-win"
|
'ext': 'flv',
|
||||||
|
'title': 'bowling-fail-or-win',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -53,10 +53,7 @@ class MyVideoIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self,url):
|
def _real_extract(self,url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
video_id = mobj.group('id')
|
||||||
raise ExtractorError(u'invalid URL: %s' % url)
|
|
||||||
|
|
||||||
video_id = mobj.group(1)
|
|
||||||
|
|
||||||
GK = (
|
GK = (
|
||||||
b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
|
b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
|
||||||
@ -74,37 +71,33 @@ class MyVideoIE(InfoExtractor):
|
|||||||
video_url = mobj.group(1) + '.flv'
|
video_url = mobj.group(1) + '.flv'
|
||||||
|
|
||||||
video_title = self._html_search_regex('<title>([^<]+)</title>',
|
video_title = self._html_search_regex('<title>([^<]+)</title>',
|
||||||
webpage, u'title')
|
webpage, 'title')
|
||||||
|
|
||||||
video_ext = self._search_regex('[.](.+?)$', video_url, u'extension')
|
return {
|
||||||
|
'id': video_id,
|
||||||
return [{
|
'url': video_url,
|
||||||
'id': video_id,
|
'title': video_title,
|
||||||
'url': video_url,
|
}
|
||||||
'uploader': None,
|
|
||||||
'upload_date': None,
|
|
||||||
'title': video_title,
|
|
||||||
'ext': video_ext,
|
|
||||||
}]
|
|
||||||
|
|
||||||
mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
|
mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
|
request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
|
||||||
response = self._download_webpage(request, video_id,
|
response = self._download_webpage(request, video_id,
|
||||||
u'Downloading video info')
|
'Downloading video info')
|
||||||
info = json.loads(base64.b64decode(response).decode('utf-8'))
|
info = json.loads(base64.b64decode(response).decode('utf-8'))
|
||||||
return {'id': video_id,
|
return {
|
||||||
'title': info['title'],
|
'id': video_id,
|
||||||
'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
|
'title': info['title'],
|
||||||
'play_path': info['filename'],
|
'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
|
||||||
'ext': 'flv',
|
'play_path': info['filename'],
|
||||||
'thumbnail': info['thumbnail'][0]['url'],
|
'ext': 'flv',
|
||||||
}
|
'thumbnail': info['thumbnail'][0]['url'],
|
||||||
|
}
|
||||||
|
|
||||||
# try encxml
|
# try encxml
|
||||||
mobj = re.search('var flashvars={(.+?)}', webpage)
|
mobj = re.search('var flashvars={(.+?)}', webpage)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
raise ExtractorError(u'Unable to extract video')
|
raise ExtractorError('Unable to extract video')
|
||||||
|
|
||||||
params = {}
|
params = {}
|
||||||
encxml = ''
|
encxml = ''
|
||||||
@ -118,7 +111,7 @@ class MyVideoIE(InfoExtractor):
|
|||||||
params['domain'] = 'www.myvideo.de'
|
params['domain'] = 'www.myvideo.de'
|
||||||
xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
|
xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
|
||||||
if 'flash_playertype=MTV' in xmldata_url:
|
if 'flash_playertype=MTV' in xmldata_url:
|
||||||
self._downloader.report_warning(u'avoiding MTV player')
|
self._downloader.report_warning('avoiding MTV player')
|
||||||
xmldata_url = (
|
xmldata_url = (
|
||||||
'http://www.myvideo.de/dynamic/get_player_video_xml.php'
|
'http://www.myvideo.de/dynamic/get_player_video_xml.php'
|
||||||
'?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
|
'?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
|
||||||
@ -144,7 +137,7 @@ class MyVideoIE(InfoExtractor):
|
|||||||
video_url = compat_urllib_parse.unquote(mobj.group(1))
|
video_url = compat_urllib_parse.unquote(mobj.group(1))
|
||||||
if 'myvideo2flash' in video_url:
|
if 'myvideo2flash' in video_url:
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
u'Rewriting URL to use unencrypted rtmp:// ...',
|
'Rewriting URL to use unencrypted rtmp:// ...',
|
||||||
video_id)
|
video_id)
|
||||||
video_url = video_url.replace('rtmpe://', 'rtmp://')
|
video_url = video_url.replace('rtmpe://', 'rtmp://')
|
||||||
|
|
||||||
@ -152,39 +145,31 @@ class MyVideoIE(InfoExtractor):
|
|||||||
# extract non rtmp videos
|
# extract non rtmp videos
|
||||||
mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
|
mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
raise ExtractorError(u'unable to extract url')
|
raise ExtractorError('unable to extract url')
|
||||||
video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
|
video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
|
||||||
|
|
||||||
video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file')
|
video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file')
|
||||||
video_file = compat_urllib_parse.unquote(video_file)
|
video_file = compat_urllib_parse.unquote(video_file)
|
||||||
|
|
||||||
if not video_file.endswith('f4m'):
|
if not video_file.endswith('f4m'):
|
||||||
ppath, prefix = video_file.split('.')
|
ppath, prefix = video_file.split('.')
|
||||||
video_playpath = '%s:%s' % (prefix, ppath)
|
video_playpath = '%s:%s' % (prefix, ppath)
|
||||||
video_hls_playlist = ''
|
|
||||||
else:
|
else:
|
||||||
video_playpath = ''
|
video_playpath = ''
|
||||||
video_hls_playlist = (
|
|
||||||
video_file
|
|
||||||
).replace('.f4m', '.m3u8')
|
|
||||||
|
|
||||||
video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj')
|
video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
|
||||||
video_swfobj = compat_urllib_parse.unquote(video_swfobj)
|
video_swfobj = compat_urllib_parse.unquote(video_swfobj)
|
||||||
|
|
||||||
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
|
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
|
||||||
webpage, u'title')
|
webpage, 'title')
|
||||||
|
|
||||||
return [{
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'tc_url': video_url,
|
'tc_url': video_url,
|
||||||
'uploader': None,
|
'title': video_title,
|
||||||
'upload_date': None,
|
'ext': 'flv',
|
||||||
'title': video_title,
|
'play_path': video_playpath,
|
||||||
'ext': u'flv',
|
'player_url': video_swfobj,
|
||||||
'play_path': video_playpath,
|
}
|
||||||
'video_file': video_file,
|
|
||||||
'video_hls_playlist': video_hls_playlist,
|
|
||||||
'player_url': video_swfobj,
|
|
||||||
}]
|
|
||||||
|
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -12,10 +11,13 @@ class NineGagIE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
"url": "http://9gag.tv/v/1912",
|
"url": "http://9gag.tv/v/1912",
|
||||||
"file": "1912.mp4",
|
|
||||||
"info_dict": {
|
"info_dict": {
|
||||||
|
"id": "1912",
|
||||||
|
"ext": "mp4",
|
||||||
"description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
|
"description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
|
||||||
"title": "\"People Are Awesome 2013\" Is Absolutely Awesome"
|
"title": "\"People Are Awesome 2013\" Is Absolutely Awesome",
|
||||||
|
"view_count": int,
|
||||||
|
"thumbnail": "re:^https?://",
|
||||||
},
|
},
|
||||||
'add_ie': ['Youtube']
|
'add_ie': ['Youtube']
|
||||||
}
|
}
|
||||||
@ -25,21 +27,27 @@ class NineGagIE(InfoExtractor):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
data_json = self._html_search_regex(r'''(?x)
|
|
||||||
<div\s*id="tv-video"\s*data-video-source="youtube"\s*
|
|
||||||
data-video-meta="([^"]+)"''', webpage, 'video metadata')
|
|
||||||
|
|
||||||
data = json.loads(data_json)
|
youtube_id = self._html_search_regex(
|
||||||
|
r'(?s)id="jsid-video-post-container".*?data-external-id="([^"]+)"',
|
||||||
|
webpage, 'video ID')
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'(?s)<div class="video-caption">.*?<p>(.*?)</p>', webpage,
|
||||||
|
'description', fatal=False)
|
||||||
|
view_count_str = self._html_search_regex(
|
||||||
|
r'<p><b>([0-9][0-9,]*)</b> views</p>', webpage, 'view count',
|
||||||
|
fatal=False)
|
||||||
|
view_count = (
|
||||||
|
None if view_count_str is None
|
||||||
|
else int(view_count_str.replace(',', '')))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': data['youtubeVideoId'],
|
'url': youtube_id,
|
||||||
'ie_key': 'Youtube',
|
'ie_key': 'Youtube',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': data['title'],
|
'title': self._og_search_title(webpage),
|
||||||
'description': data['description'],
|
'description': description,
|
||||||
'view_count': int(data['view_count']),
|
'view_count': view_count,
|
||||||
'like_count': int(data['statistic']['like']),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'dislike_count': int(data['statistic']['dislike']),
|
|
||||||
'thumbnail': data['thumbnail_url'],
|
|
||||||
}
|
}
|
||||||
|
@ -1,20 +1,23 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import unescapeHTML
|
from ..utils import unescapeHTML
|
||||||
|
|
||||||
|
|
||||||
class OoyalaIE(InfoExtractor):
|
class OoyalaIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=(?P<id>.+?)(&|$)'
|
_VALID_URL = r'https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=(?P<id>.+?)(&|$)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
|
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
|
||||||
u'url': u'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
|
'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
|
||||||
u'file': u'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8.mp4',
|
'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
|
||||||
u'md5': u'3f5cceb3a7bf461d6c29dc466cf8033c',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
|
||||||
u'title': u'Explaining Data Recovery from Hard Drives and SSDs',
|
'ext': 'mp4',
|
||||||
u'description': u'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
|
'title': 'Explaining Data Recovery from Hard Drives and SSDs',
|
||||||
|
'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -28,13 +31,14 @@ class OoyalaIE(InfoExtractor):
|
|||||||
ie=cls.ie_key())
|
ie=cls.ie_key())
|
||||||
|
|
||||||
def _extract_result(self, info, more_info):
|
def _extract_result(self, info, more_info):
|
||||||
return {'id': info['embedCode'],
|
return {
|
||||||
'ext': 'mp4',
|
'id': info['embedCode'],
|
||||||
'title': unescapeHTML(info['title']),
|
'ext': 'mp4',
|
||||||
'url': info.get('ipad_url') or info['url'],
|
'title': unescapeHTML(info['title']),
|
||||||
'description': unescapeHTML(more_info['description']),
|
'url': info.get('ipad_url') or info['url'],
|
||||||
'thumbnail': more_info['promo'],
|
'description': unescapeHTML(more_info['description']),
|
||||||
}
|
'thumbnail': more_info['promo'],
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@ -42,22 +46,23 @@ class OoyalaIE(InfoExtractor):
|
|||||||
player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode
|
player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode
|
||||||
player = self._download_webpage(player_url, embedCode)
|
player = self._download_webpage(player_url, embedCode)
|
||||||
mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
|
mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
|
||||||
player, u'mobile player url')
|
player, 'mobile player url')
|
||||||
mobile_player = self._download_webpage(mobile_url, embedCode)
|
mobile_player = self._download_webpage(mobile_url, embedCode)
|
||||||
videos_info = self._search_regex(
|
videos_info = self._search_regex(
|
||||||
r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
|
r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
|
||||||
mobile_player, u'info').replace('\\"','"')
|
mobile_player, 'info').replace('\\"','"')
|
||||||
videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"')
|
videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"','"')
|
||||||
videos_info = json.loads(videos_info)
|
videos_info = json.loads(videos_info)
|
||||||
videos_more_info =json.loads(videos_more_info)
|
videos_more_info =json.loads(videos_more_info)
|
||||||
|
|
||||||
if videos_more_info.get('lineup'):
|
if videos_more_info.get('lineup'):
|
||||||
videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
|
videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
|
||||||
return {'_type': 'playlist',
|
return {
|
||||||
'id': embedCode,
|
'_type': 'playlist',
|
||||||
'title': unescapeHTML(videos_more_info['title']),
|
'id': embedCode,
|
||||||
'entries': videos,
|
'title': unescapeHTML(videos_more_info['title']),
|
||||||
}
|
'entries': videos,
|
||||||
|
}
|
||||||
else:
|
else:
|
||||||
return self._extract_result(videos_info[0], videos_more_info)
|
return self._extract_result(videos_info[0], videos_more_info)
|
||||||
|
|
||||||
|
@ -8,6 +8,7 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -35,7 +36,15 @@ class ORFIE(InfoExtractor):
|
|||||||
data_json = self._search_regex(
|
data_json = self._search_regex(
|
||||||
r'initializeAdworx\((.+?)\);\n', webpage, 'video info')
|
r'initializeAdworx\((.+?)\);\n', webpage, 'video info')
|
||||||
all_data = json.loads(data_json)
|
all_data = json.loads(data_json)
|
||||||
sdata = all_data[0]['values']['segments']
|
|
||||||
|
def get_segments(all_data):
|
||||||
|
for data in all_data:
|
||||||
|
if data['name'] == 'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM':
|
||||||
|
return data['values']['segments']
|
||||||
|
|
||||||
|
sdata = get_segments(all_data)
|
||||||
|
if not sdata:
|
||||||
|
raise ExtractorError('Unable to extract segments')
|
||||||
|
|
||||||
def quality_to_int(s):
|
def quality_to_int(s):
|
||||||
m = re.search('([0-9]+)', s)
|
m = re.search('([0-9]+)', s)
|
||||||
|
53
youtube_dl/extractor/parliamentliveuk.py
Normal file
53
youtube_dl/extractor/parliamentliveuk.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class ParliamentLiveUKIE(InfoExtractor):
|
||||||
|
IE_NAME = 'parliamentlive.tv'
|
||||||
|
IE_DESC = 'UK parliament videos'
|
||||||
|
_VALID_URL = r'https?://www\.parliamentlive\.tv/Main/Player\.aspx\?(?:[^&]+&)*?meetingId=(?P<id>[0-9]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.parliamentlive.tv/Main/Player.aspx?meetingId=15121&player=windowsmedia',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '15121',
|
||||||
|
'ext': 'asf',
|
||||||
|
'title': 'hoc home affairs committee, 18 mar 2014.pm',
|
||||||
|
'description': 'md5:033b3acdf83304cd43946b2d5e5798d1',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # Requires mplayer (mms)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
asx_url = self._html_search_regex(
|
||||||
|
r'embed.*?src="([^"]+)" name="MediaPlayer"', webpage,
|
||||||
|
'metadata URL')
|
||||||
|
asx = self._download_xml(asx_url, video_id, 'Downloading ASX metadata')
|
||||||
|
video_url = asx.find('.//REF').attrib['HREF']
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
r'''(?x)player\.setClipDetails\(
|
||||||
|
(?:(?:[0-9]+|"[^"]+"),\s*){2}
|
||||||
|
"([^"]+",\s*"[^"]+)"
|
||||||
|
''',
|
||||||
|
webpage, 'title').replace('", "', ', ')
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'(?s)<span id="MainContentPlaceHolder_CaptionsBlock_WitnessInfo">(.*?)</span>',
|
||||||
|
webpage, 'description')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'ext': 'asf',
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
}
|
@ -3,6 +3,9 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
US_RATINGS,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class PBSIE(InfoExtractor):
|
class PBSIE(InfoExtractor):
|
||||||
@ -13,7 +16,7 @@ class PBSIE(InfoExtractor):
|
|||||||
# Article with embedded player
|
# Article with embedded player
|
||||||
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
|
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
|
||||||
# Player
|
# Player
|
||||||
video\.pbs\.org/partnerplayer/(?P<player_id>[^/]+)/
|
video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
|
||||||
)
|
)
|
||||||
'''
|
'''
|
||||||
|
|
||||||
@ -57,6 +60,11 @@ class PBSIE(InfoExtractor):
|
|||||||
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
||||||
info = self._download_json(info_url, display_id)
|
info = self._download_json(info_url, display_id)
|
||||||
|
|
||||||
|
rating_str = info.get('rating')
|
||||||
|
if rating_str is not None:
|
||||||
|
rating_str = rating_str.rpartition('-')[2]
|
||||||
|
age_limit = US_RATINGS.get(rating_str)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': info['title'],
|
'title': info['title'],
|
||||||
@ -65,4 +73,5 @@ class PBSIE(InfoExtractor):
|
|||||||
'description': info['program'].get('description'),
|
'description': info['program'].get('description'),
|
||||||
'thumbnail': info.get('image_url'),
|
'thumbnail': info.get('image_url'),
|
||||||
'duration': info.get('duration'),
|
'duration': info.get('duration'),
|
||||||
|
'age_limit': age_limit,
|
||||||
}
|
}
|
||||||
|
@ -1,76 +1,43 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
)
|
|
||||||
|
|
||||||
class PhotobucketIE(InfoExtractor):
|
class PhotobucketIE(InfoExtractor):
|
||||||
"""Information extractor for photobucket.com."""
|
_VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
|
||||||
|
|
||||||
# TODO: the original _VALID_URL was:
|
|
||||||
# r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
|
|
||||||
# Check if it's necessary to keep the old extracion process
|
|
||||||
_VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
|
|
||||||
IE_NAME = u'photobucket'
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
|
'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
|
||||||
u'file': u'zpsc0c3b9fa.mp4',
|
'file': 'zpsc0c3b9fa.mp4',
|
||||||
u'md5': u'7dabfb92b0a31f6c16cebc0f8e60ff99',
|
'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u"upload_date": u"20130504",
|
'upload_date': '20130504',
|
||||||
u"uploader": u"rachaneronas",
|
'uploader': 'rachaneronas',
|
||||||
u"title": u"Tired of Link Building? Try BacklinkMyDomain.com!"
|
'title': 'Tired of Link Building? Try BacklinkMyDomain.com!',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# Extract id from URL
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
|
||||||
|
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
video_extension = mobj.group('ext')
|
video_extension = mobj.group('ext')
|
||||||
|
|
||||||
# Retrieve video webpage to extract further information
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
# Extract URL, uploader, and title from webpage
|
# Extract URL, uploader, and title from webpage
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
# We try first by looking the javascript code:
|
info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);',
|
||||||
mobj = re.search(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (?P<json>.*?)\);', webpage)
|
webpage, 'info json')
|
||||||
if mobj is not None:
|
info = json.loads(info_json)
|
||||||
info = json.loads(mobj.group('json'))
|
return {
|
||||||
return [{
|
'id': video_id,
|
||||||
'id': video_id,
|
'url': info['downloadUrl'],
|
||||||
'url': info[u'downloadUrl'],
|
'uploader': info['username'],
|
||||||
'uploader': info[u'username'],
|
'upload_date': datetime.date.fromtimestamp(info['creationDate']).strftime('%Y%m%d'),
|
||||||
'upload_date': datetime.date.fromtimestamp(info[u'creationDate']).strftime('%Y%m%d'),
|
'title': info['title'],
|
||||||
'title': info[u'title'],
|
'ext': video_extension,
|
||||||
'ext': video_extension,
|
'thumbnail': info['thumbUrl'],
|
||||||
'thumbnail': info[u'thumbUrl'],
|
}
|
||||||
}]
|
|
||||||
|
|
||||||
# We try looking in other parts of the webpage
|
|
||||||
video_url = self._search_regex(r'<link rel="video_src" href=".*\?file=([^"]+)" />',
|
|
||||||
webpage, u'video URL')
|
|
||||||
|
|
||||||
mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
|
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError(u'Unable to extract title')
|
|
||||||
video_title = mobj.group(1).decode('utf-8')
|
|
||||||
video_uploader = mobj.group(2).decode('utf-8')
|
|
||||||
|
|
||||||
return [{
|
|
||||||
'id': video_id.decode('utf-8'),
|
|
||||||
'url': video_url.decode('utf-8'),
|
|
||||||
'uploader': video_uploader,
|
|
||||||
'upload_date': None,
|
|
||||||
'title': video_title,
|
|
||||||
'ext': video_extension.decode('utf-8'),
|
|
||||||
}]
|
|
||||||
|
80
youtube_dl/extractor/playvid.py
Normal file
80
youtube_dl/extractor/playvid.py
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PlayvidIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'^https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.playvid.com/watch/agbDDi7WZTV',
|
||||||
|
'md5': '44930f8afa616efdf9482daf4fe53e1e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'agbDDi7WZTV',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Michelle Lewin in Miami Beach',
|
||||||
|
'duration': 240,
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
video_title = None
|
||||||
|
duration = None
|
||||||
|
video_thumbnail = None
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
# most of the information is stored in the flashvars
|
||||||
|
flashvars = self._html_search_regex(
|
||||||
|
r'flashvars="(.+?)"', webpage, 'flashvars')
|
||||||
|
|
||||||
|
infos = compat_urllib_parse.unquote(flashvars).split(r'&')
|
||||||
|
for info in infos:
|
||||||
|
videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info)
|
||||||
|
if videovars_match:
|
||||||
|
key = videovars_match.group(1)
|
||||||
|
val = videovars_match.group(2)
|
||||||
|
|
||||||
|
if key == 'title':
|
||||||
|
video_title = compat_urllib_parse.unquote_plus(val)
|
||||||
|
if key == 'duration':
|
||||||
|
try:
|
||||||
|
duration = int(val)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
if key == 'big_thumb':
|
||||||
|
video_thumbnail = val
|
||||||
|
|
||||||
|
videourl_match = re.match(
|
||||||
|
r'^video_urls\]\[(?P<resolution>[0-9]+)p', key)
|
||||||
|
if videourl_match:
|
||||||
|
height = int(videourl_match.group('resolution'))
|
||||||
|
formats.append({
|
||||||
|
'height': height,
|
||||||
|
'url': val,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
# Extract title - should be in the flashvars; if not, look elsewhere
|
||||||
|
if video_title is None:
|
||||||
|
video_title = self._html_search_regex(
|
||||||
|
r'<title>(.*?)</title', webpage, 'title')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': video_title,
|
||||||
|
'thumbnail': video_thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'description': None,
|
||||||
|
'age_limit': 18
|
||||||
|
}
|
@ -8,6 +8,7 @@ from ..utils import (
|
|||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
|
str_to_int,
|
||||||
)
|
)
|
||||||
from ..aes import (
|
from ..aes import (
|
||||||
aes_decrypt_text
|
aes_decrypt_text
|
||||||
@ -27,6 +28,12 @@ class PornHubIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _extract_count(self, pattern, webpage, name):
|
||||||
|
count = self._html_search_regex(pattern, webpage, '%s count' % name, fatal=False)
|
||||||
|
if count:
|
||||||
|
count = str_to_int(count)
|
||||||
|
return count
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('videoid')
|
video_id = mobj.group('videoid')
|
||||||
@ -37,14 +44,22 @@ class PornHubIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
|
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
|
||||||
video_uploader = self._html_search_regex(r'<b>From: </b>(?:\s|<[^>]*>)*(.+?)<', webpage, 'uploader', fatal=False)
|
video_uploader = self._html_search_regex(
|
||||||
|
r'(?s)<div class="video-info-row">\s*From: .+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<',
|
||||||
|
webpage, 'uploader', fatal=False)
|
||||||
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
|
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
|
||||||
if thumbnail:
|
if thumbnail:
|
||||||
thumbnail = compat_urllib_parse.unquote(thumbnail)
|
thumbnail = compat_urllib_parse.unquote(thumbnail)
|
||||||
|
|
||||||
|
view_count = self._extract_count(r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
|
||||||
|
like_count = self._extract_count(r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
|
||||||
|
dislike_count = self._extract_count(r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
|
||||||
|
comment_count = self._extract_count(
|
||||||
|
r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment')
|
||||||
|
|
||||||
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
|
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
|
||||||
if webpage.find('"encrypted":true') != -1:
|
if webpage.find('"encrypted":true') != -1:
|
||||||
password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password').replace('+', ' ')
|
password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
|
||||||
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
|
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
@ -77,6 +92,10 @@ class PornHubIE(InfoExtractor):
|
|||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
'view_count': view_count,
|
||||||
|
'like_count': like_count,
|
||||||
|
'dislike_count': dislike_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
|
@ -51,14 +51,14 @@ class ProSiebenSat1IE(InfoExtractor):
|
|||||||
'skip': 'Seems to be broken',
|
'skip': 'Seems to be broken',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.prosiebenmaxx.de/yep/one-piece/video/148-folge-48-gold-rogers-heimat-ganze-folge',
|
'url': 'http://www.prosiebenmaxx.de/tv/experience/video/144-countdown-fuer-die-autowerkstatt-ganze-folge',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2437108',
|
'id': '2429369',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Folge 48: Gold Rogers Heimat',
|
'title': 'Countdown für die Autowerkstatt',
|
||||||
'description': 'Ruffy erreicht die Insel, auf der der berühmte Gold Roger lebte und hingerichtet wurde.',
|
'description': 'md5:809fc051a457b5d8666013bc40698817',
|
||||||
'upload_date': '20140226',
|
'upload_date': '20140223',
|
||||||
'duration': 1401.48,
|
'duration': 2595.04,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# rtmp download
|
# rtmp download
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -6,16 +8,17 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
class RadioFranceIE(InfoExtractor):
|
class RadioFranceIE(InfoExtractor):
|
||||||
_VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
|
_VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
|
||||||
IE_NAME = u'radiofrance'
|
IE_NAME = 'radiofrance'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://maison.radiofrance.fr/radiovisions/one-one',
|
'url': 'http://maison.radiofrance.fr/radiovisions/one-one',
|
||||||
u'file': u'one-one.ogg',
|
'md5': 'bdbb28ace95ed0e04faab32ba3160daf',
|
||||||
u'md5': u'bdbb28ace95ed0e04faab32ba3160daf',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': 'one-one',
|
||||||
u"title": u"One to one",
|
'ext': 'ogg',
|
||||||
u"description": u"Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
|
"title": "One to one",
|
||||||
u"uploader": u"Thomas Hercouët",
|
"description": "Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
|
||||||
|
"uploader": "Thomas Hercouët",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -24,27 +27,28 @@ class RadioFranceIE(InfoExtractor):
|
|||||||
video_id = m.group('id')
|
video_id = m.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, u'title')
|
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>',
|
r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>',
|
||||||
webpage, u'description', fatal=False)
|
webpage, 'description', fatal=False)
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'<div class="credit"> © (.*?)</div>',
|
r'<div class="credit"> © (.*?)</div>',
|
||||||
webpage, u'uploader', fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
|
|
||||||
formats_str = self._html_search_regex(
|
formats_str = self._html_search_regex(
|
||||||
r'class="jp-jplayer[^"]*" data-source="([^"]+)">',
|
r'class="jp-jplayer[^"]*" data-source="([^"]+)">',
|
||||||
webpage, u'audio URLs')
|
webpage, 'audio URLs')
|
||||||
formats = [
|
formats = [
|
||||||
{
|
{
|
||||||
'format_id': fm[0],
|
'format_id': fm[0],
|
||||||
'url': fm[1],
|
'url': fm[1],
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
|
'preference': i,
|
||||||
}
|
}
|
||||||
for fm in
|
for i, fm in
|
||||||
re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str)
|
enumerate(re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str))
|
||||||
]
|
]
|
||||||
# No sorting, we don't know any more about these formats
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
61
youtube_dl/extractor/rts.py
Normal file
61
youtube_dl/extractor/rts.py
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
parse_iso8601,
|
||||||
|
unescapeHTML,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RTSIE(InfoExtractor):
|
||||||
|
IE_DESC = 'RTS.ch'
|
||||||
|
_VALID_URL = r'^https?://(?:www\.)?rts\.ch/archives/tv/[^/]+/(?P<id>[0-9]+)-.*?\.html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
|
||||||
|
'md5': '753b877968ad8afaeddccc374d4256a5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3449373',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'duration': 1488,
|
||||||
|
'title': 'Les Enfants Terribles',
|
||||||
|
'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.',
|
||||||
|
'uploader': 'Divers',
|
||||||
|
'upload_date': '19680921',
|
||||||
|
'timestamp': -40280400,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
m = re.match(self._VALID_URL, url)
|
||||||
|
video_id = m.group('id')
|
||||||
|
|
||||||
|
all_info = self._download_json(
|
||||||
|
'http://www.rts.ch/a/%s.html?f=json/article' % video_id, video_id)
|
||||||
|
info = all_info['video']['JSONinfo']
|
||||||
|
|
||||||
|
upload_timestamp = parse_iso8601(info.get('broadcast_date'))
|
||||||
|
duration = parse_duration(info.get('duration'))
|
||||||
|
thumbnail = unescapeHTML(info.get('preview_image_url'))
|
||||||
|
formats = [{
|
||||||
|
'format_id': fid,
|
||||||
|
'url': furl,
|
||||||
|
'tbr': int_or_none(self._search_regex(
|
||||||
|
r'-([0-9]+)k\.', furl, 'bitrate', default=None)),
|
||||||
|
} for fid, furl in info['streams'].items()]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': info['title'],
|
||||||
|
'description': info.get('intro'),
|
||||||
|
'duration': duration,
|
||||||
|
'uploader': info.get('programName'),
|
||||||
|
'timestamp': upload_timestamp,
|
||||||
|
}
|
183
youtube_dl/extractor/rutv.py
Normal file
183
youtube_dl/extractor/rutv.py
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RUTVIE(InfoExtractor):
|
||||||
|
IE_DESC = 'RUTV.RU'
|
||||||
|
_VALID_URL = r'https?://player\.(?:rutv\.ru|vgtrk\.com)/(?:flash2v/container\.swf\?id=|iframe/(?P<type>swf|video|live)/id/)(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://player.rutv.ru/flash2v/container.swf?id=774471&sid=kultura&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972347/video_id/978186/brand_id/31724',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '774471',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Монологи на все времена',
|
||||||
|
'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5',
|
||||||
|
'duration': 2906,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://player.vgtrk.com/flash2v/container.swf?id=774016&sid=russiatv&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972098/video_id/977760/brand_id/57638',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '774016',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Чужой в семье Сталина',
|
||||||
|
'description': '',
|
||||||
|
'duration': 2539,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://player.rutv.ru/iframe/swf/id/766888/sid/hitech/?acc_video_id=4000',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '766888',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"',
|
||||||
|
'description': 'md5:65ddd47f9830c4f42ed6475f8730c995',
|
||||||
|
'duration': 279,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://player.rutv.ru/iframe/video/id/771852/start_zoom/true/showZoomBtn/false/sid/russiatv/?acc_video_id=episode_id/970443/video_id/975648/brand_id/5169',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '771852',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет',
|
||||||
|
'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8',
|
||||||
|
'duration': 3096,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://player.rutv.ru/iframe/live/id/51499/showZoomBtn/false/isPlay/true/sid/sochi2014',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '51499',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
|
||||||
|
'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Translation has finished',
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_url(cls, webpage):
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.rutv\.ru/iframe/(?:swf|video|live)/id/.+?)\1', webpage)
|
||||||
|
if mobj:
|
||||||
|
return mobj.group('url')
|
||||||
|
|
||||||
|
mobj = re.search(
|
||||||
|
r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>http://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)',
|
||||||
|
webpage)
|
||||||
|
if mobj:
|
||||||
|
return mobj.group('url')
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
video_type = mobj.group('type')
|
||||||
|
|
||||||
|
if not video_type or video_type == 'swf':
|
||||||
|
video_type = 'video'
|
||||||
|
|
||||||
|
json_data = self._download_json(
|
||||||
|
'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id),
|
||||||
|
video_id, 'Downloading JSON')
|
||||||
|
|
||||||
|
if json_data['errors']:
|
||||||
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, json_data['errors']), expected=True)
|
||||||
|
|
||||||
|
playlist = json_data['data']['playlist']
|
||||||
|
medialist = playlist['medialist']
|
||||||
|
media = medialist[0]
|
||||||
|
|
||||||
|
if media['errors']:
|
||||||
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, media['errors']), expected=True)
|
||||||
|
|
||||||
|
view_count = playlist.get('count_views')
|
||||||
|
priority_transport = playlist['priority_transport']
|
||||||
|
|
||||||
|
thumbnail = media['picture']
|
||||||
|
width = int_or_none(media['width'])
|
||||||
|
height = int_or_none(media['height'])
|
||||||
|
description = media['anons']
|
||||||
|
title = media['title']
|
||||||
|
duration = int_or_none(media.get('duration'))
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
for transport, links in media['sources'].items():
|
||||||
|
for quality, url in links.items():
|
||||||
|
if transport == 'rtmp':
|
||||||
|
mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url)
|
||||||
|
if not mobj:
|
||||||
|
continue
|
||||||
|
fmt = {
|
||||||
|
'url': mobj.group('url'),
|
||||||
|
'play_path': mobj.group('playpath'),
|
||||||
|
'app': mobj.group('app'),
|
||||||
|
'page_url': 'http://player.rutv.ru',
|
||||||
|
'player_url': 'http://player.rutv.ru/flash2v/osmf.swf?i=22',
|
||||||
|
'rtmp_live': True,
|
||||||
|
'ext': 'flv',
|
||||||
|
'vbr': int(quality),
|
||||||
|
}
|
||||||
|
elif transport == 'm3u8':
|
||||||
|
fmt = {
|
||||||
|
'url': url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
fmt = {
|
||||||
|
'url': url
|
||||||
|
}
|
||||||
|
fmt.update({
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
'format_id': '%s-%s' % (transport, quality),
|
||||||
|
'preference': -1 if priority_transport == transport else -2,
|
||||||
|
})
|
||||||
|
formats.append(fmt)
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
raise ExtractorError('No media links available for %s' % video_id)
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'view_count': view_count,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -54,6 +54,7 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
'id': '47127627',
|
'id': '47127627',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Goldrushed',
|
'title': 'Goldrushed',
|
||||||
|
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
|
||||||
'uploader': 'The Royal Concept',
|
'uploader': 'The Royal Concept',
|
||||||
'upload_date': '20120521',
|
'upload_date': '20120521',
|
||||||
},
|
},
|
||||||
@ -99,7 +100,7 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
|
|
||||||
def report_resolve(self, video_id):
|
def report_resolve(self, video_id):
|
||||||
"""Report information extraction."""
|
"""Report information extraction."""
|
||||||
self.to_screen(u'%s: Resolving id' % video_id)
|
self.to_screen('%s: Resolving id' % video_id)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _resolv_url(cls, url):
|
def _resolv_url(cls, url):
|
||||||
@ -123,45 +124,46 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
'description': info['description'],
|
'description': info['description'],
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
}
|
}
|
||||||
|
formats = []
|
||||||
if info.get('downloadable', False):
|
if info.get('downloadable', False):
|
||||||
# We can build a direct link to the song
|
# We can build a direct link to the song
|
||||||
format_url = (
|
format_url = (
|
||||||
'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
|
'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
|
||||||
track_id, self._CLIENT_ID))
|
track_id, self._CLIENT_ID))
|
||||||
result['formats'] = [{
|
formats.append({
|
||||||
'format_id': 'download',
|
'format_id': 'download',
|
||||||
'ext': info.get('original_format', 'mp3'),
|
'ext': info.get('original_format', 'mp3'),
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
}]
|
'preference': 10,
|
||||||
else:
|
})
|
||||||
# We have to retrieve the url
|
|
||||||
streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?'
|
|
||||||
'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
|
|
||||||
stream_json = self._download_webpage(
|
|
||||||
streams_url,
|
|
||||||
track_id, 'Downloading track url')
|
|
||||||
|
|
||||||
formats = []
|
# We have to retrieve the url
|
||||||
format_dict = json.loads(stream_json)
|
streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?'
|
||||||
for key, stream_url in format_dict.items():
|
'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
|
||||||
if key.startswith(u'http'):
|
stream_json = self._download_webpage(
|
||||||
formats.append({
|
streams_url,
|
||||||
'format_id': key,
|
track_id, 'Downloading track url')
|
||||||
'ext': ext,
|
|
||||||
'url': stream_url,
|
format_dict = json.loads(stream_json)
|
||||||
'vcodec': 'none',
|
for key, stream_url in format_dict.items():
|
||||||
})
|
if key.startswith('http'):
|
||||||
elif key.startswith(u'rtmp'):
|
formats.append({
|
||||||
# The url doesn't have an rtmp app, we have to extract the playpath
|
'format_id': key,
|
||||||
url, path = stream_url.split('mp3:', 1)
|
'ext': ext,
|
||||||
formats.append({
|
'url': stream_url,
|
||||||
'format_id': key,
|
'vcodec': 'none',
|
||||||
'url': url,
|
})
|
||||||
'play_path': 'mp3:' + path,
|
elif key.startswith('rtmp'):
|
||||||
'ext': ext,
|
# The url doesn't have an rtmp app, we have to extract the playpath
|
||||||
'vcodec': 'none',
|
url, path = stream_url.split('mp3:', 1)
|
||||||
})
|
formats.append({
|
||||||
|
'format_id': key,
|
||||||
|
'url': url,
|
||||||
|
'play_path': 'mp3:' + path,
|
||||||
|
'ext': ext,
|
||||||
|
'vcodec': 'none',
|
||||||
|
})
|
||||||
|
|
||||||
if not formats:
|
if not formats:
|
||||||
# We fallback to the stream_url in the original info, this
|
# We fallback to the stream_url in the original info, this
|
||||||
@ -187,7 +189,7 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
raise ExtractorError('Invalid URL: %s' % url)
|
||||||
|
|
||||||
track_id = mobj.group('track_id')
|
track_id = mobj.group('track_id')
|
||||||
token = None
|
token = None
|
||||||
@ -217,7 +219,7 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
return self._extract_info_dict(info, full_title, secret_token=token)
|
return self._extract_info_dict(info, full_title, secret_token=token)
|
||||||
|
|
||||||
class SoundcloudSetIE(SoundcloudIE):
|
class SoundcloudSetIE(SoundcloudIE):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
|
_VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
|
||||||
IE_NAME = 'soundcloud:set'
|
IE_NAME = 'soundcloud:set'
|
||||||
# it's in tests/test_playlists.py
|
# it's in tests/test_playlists.py
|
||||||
_TESTS = []
|
_TESTS = []
|
||||||
@ -225,7 +227,7 @@ class SoundcloudSetIE(SoundcloudIE):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
raise ExtractorError('Invalid URL: %s' % url)
|
||||||
|
|
||||||
# extract uploader (which is in the url)
|
# extract uploader (which is in the url)
|
||||||
uploader = mobj.group(1)
|
uploader = mobj.group(1)
|
||||||
@ -242,7 +244,7 @@ class SoundcloudSetIE(SoundcloudIE):
|
|||||||
info = json.loads(info_json)
|
info = json.loads(info_json)
|
||||||
if 'errors' in info:
|
if 'errors' in info:
|
||||||
for err in info['errors']:
|
for err in info['errors']:
|
||||||
self._downloader.report_error(u'unable to download video webpage: %s' % compat_str(err['error_message']))
|
self._downloader.report_error('unable to download video webpage: %s' % compat_str(err['error_message']))
|
||||||
return
|
return
|
||||||
|
|
||||||
self.report_extraction(full_title)
|
self.report_extraction(full_title)
|
||||||
|
@ -1,10 +1,15 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .mtv import MTVServicesInfoExtractor
|
from .mtv import MTVServicesInfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class SpikeIE(MTVServicesInfoExtractor):
|
class SpikeIE(MTVServicesInfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.spike\.com/(video-clips|episodes)/.+'
|
_VALID_URL = r'''(?x)https?://
|
||||||
|
(www\.spike\.com/(video-clips|episodes)/.+|
|
||||||
|
m\.spike\.com/videos/video.rbml\?id=(?P<mobile_id>[^&]+))
|
||||||
|
'''
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle',
|
'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle',
|
||||||
'md5': '1a9265f32b0c375793d6c4ce45255256',
|
'md5': '1a9265f32b0c375793d6c4ce45255256',
|
||||||
@ -17,3 +22,11 @@ class SpikeIE(MTVServicesInfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
_FEED_URL = 'http://www.spike.com/feeds/mrss/'
|
_FEED_URL = 'http://www.spike.com/feeds/mrss/'
|
||||||
|
_MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.search(self._VALID_URL, url)
|
||||||
|
mobile_id = mobj.group('mobile_id')
|
||||||
|
if mobile_id is not None:
|
||||||
|
url = 'http://www.spike.com/video-clips/%s' % mobile_id
|
||||||
|
return super(SpikeIE, self)._real_extract(url)
|
||||||
|
@ -6,115 +6,120 @@ import re
|
|||||||
from .subtitles import SubtitlesInfoExtractor
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
RegexNotFoundError,
|
compat_str,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TEDIE(SubtitlesInfoExtractor):
|
class TEDIE(SubtitlesInfoExtractor):
|
||||||
_VALID_URL=r'''http://www\.ted\.com/
|
_VALID_URL = r'''(?x)
|
||||||
(
|
(?P<proto>https?://)
|
||||||
((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist
|
(?P<type>www|embed)(?P<urlmain>\.ted\.com/
|
||||||
|
|
(
|
||||||
((?P<type_talk>talks)) # We have a simple talk
|
(?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
|
||||||
)
|
|
|
||||||
(/lang/(.*?))? # The url may contain the language
|
((?P<type_talk>talks)) # We have a simple talk
|
||||||
/(?P<name>\w+) # Here goes the name and then ".html"
|
)
|
||||||
'''
|
(/lang/(.*?))? # The url may contain the language
|
||||||
|
/(?P<name>\w+) # Here goes the name and then ".html"
|
||||||
|
.*)$
|
||||||
|
'''
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
|
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
|
||||||
'file': '102.mp4',
|
|
||||||
'md5': '4ea1dada91e4174b53dac2bb8ace429d',
|
'md5': '4ea1dada91e4174b53dac2bb8ace429d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
"description": "md5:c6fa72e6eedbd938c9caf6b2702f5922",
|
'id': '102',
|
||||||
"title": "Dan Dennett: The illusion of consciousness"
|
'ext': 'mp4',
|
||||||
|
'title': 'The illusion of consciousness',
|
||||||
|
'description': ('Philosopher Dan Dennett makes a compelling '
|
||||||
|
'argument that not only don\'t we understand our own '
|
||||||
|
'consciousness, but that half the time our brains are '
|
||||||
|
'actively fooling us.'),
|
||||||
|
'uploader': 'Dan Dennett',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@classmethod
|
_FORMATS_PREFERENCE = {
|
||||||
def suitable(cls, url):
|
'low': 1,
|
||||||
"""Receives a URL and returns True if suitable for this IE."""
|
'medium': 2,
|
||||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
'high': 3,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_info(self, webpage):
|
||||||
|
info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>',
|
||||||
|
webpage, 'info json')
|
||||||
|
return json.loads(info_json)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m=re.match(self._VALID_URL, url, re.VERBOSE)
|
m = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||||
|
if m.group('type') == 'embed':
|
||||||
|
desktop_url = m.group('proto') + 'www' + m.group('urlmain')
|
||||||
|
return self.url_result(desktop_url, 'TED')
|
||||||
|
name = m.group('name')
|
||||||
if m.group('type_talk'):
|
if m.group('type_talk'):
|
||||||
return self._talk_info(url)
|
return self._talk_info(url, name)
|
||||||
else :
|
else:
|
||||||
playlist_id=m.group('playlist_id')
|
return self._playlist_videos_info(url, name)
|
||||||
name=m.group('name')
|
|
||||||
self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name))
|
|
||||||
return [self._playlist_videos_info(url,name,playlist_id)]
|
|
||||||
|
|
||||||
|
def _playlist_videos_info(self, url, name):
|
||||||
def _playlist_videos_info(self, url, name, playlist_id):
|
|
||||||
'''Returns the videos of the playlist'''
|
'''Returns the videos of the playlist'''
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(url, name,
|
||||||
url, playlist_id, 'Downloading playlist webpage')
|
'Downloading playlist webpage')
|
||||||
matches = re.finditer(
|
info = self._extract_info(webpage)
|
||||||
r'<p\s+class="talk-title[^"]*"><a\s+href="(?P<talk_url>/talks/[^"]+\.html)">[^<]*</a></p>',
|
playlist_info = info['playlist']
|
||||||
webpage)
|
|
||||||
|
|
||||||
playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>',
|
|
||||||
webpage, 'playlist title')
|
|
||||||
|
|
||||||
playlist_entries = [
|
playlist_entries = [
|
||||||
self.url_result(u'http://www.ted.com' + m.group('talk_url'), 'TED')
|
self.url_result(u'http://www.ted.com/talks/' + talk['slug'], self.ie_key())
|
||||||
for m in matches
|
for talk in info['talks']
|
||||||
]
|
]
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
playlist_entries, playlist_id=playlist_id, playlist_title=playlist_title)
|
playlist_entries,
|
||||||
|
playlist_id=compat_str(playlist_info['id']),
|
||||||
|
playlist_title=playlist_info['title'])
|
||||||
|
|
||||||
def _talk_info(self, url, video_id=0):
|
def _talk_info(self, url, video_name):
|
||||||
"""Return the video for the talk in the url"""
|
webpage = self._download_webpage(url, video_name)
|
||||||
m = re.match(self._VALID_URL, url,re.VERBOSE)
|
|
||||||
video_name = m.group('name')
|
|
||||||
webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name)
|
|
||||||
self.report_extraction(video_name)
|
self.report_extraction(video_name)
|
||||||
# If the url includes the language we get the title translated
|
|
||||||
title = self._html_search_regex(r'<span .*?id="altHeadline".+?>(?P<title>.*)</span>',
|
|
||||||
webpage, 'title')
|
|
||||||
json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>',
|
|
||||||
webpage, 'json data')
|
|
||||||
info = json.loads(json_data)
|
|
||||||
desc = self._html_search_regex(r'<div class="talk-intro">.*?<p.*?>(.*?)</p>',
|
|
||||||
webpage, 'description', flags = re.DOTALL)
|
|
||||||
|
|
||||||
thumbnail = self._search_regex(r'</span>[\s.]*</div>[\s.]*<img src="(.*?)"',
|
talk_info = self._extract_info(webpage)['talks'][0]
|
||||||
webpage, 'thumbnail')
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'url': stream['file'],
|
'url': format_url,
|
||||||
'format': stream['id']
|
'format_id': format_id,
|
||||||
} for stream in info['htmlStreams']]
|
'format': format_id,
|
||||||
|
'preference': self._FORMATS_PREFERENCE.get(format_id, -1),
|
||||||
video_id = info['id']
|
} for (format_id, format_url) in talk_info['nativeDownloads'].items()]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
video_id = compat_str(talk_info['id'])
|
||||||
# subtitles
|
# subtitles
|
||||||
video_subtitles = self.extract_subtitles(video_id, webpage)
|
video_subtitles = self.extract_subtitles(video_id, talk_info)
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
self._list_available_subtitles(video_id, webpage)
|
self._list_available_subtitles(video_id, talk_info)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
thumbnail = talk_info['thumb']
|
||||||
|
if not thumbnail.startswith('http'):
|
||||||
|
thumbnail = 'http://' + thumbnail
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': talk_info['title'],
|
||||||
|
'uploader': talk_info['speaker'],
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'description': desc,
|
'description': self._og_search_description(webpage),
|
||||||
'subtitles': video_subtitles,
|
'subtitles': video_subtitles,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _get_available_subtitles(self, video_id, webpage):
|
def _get_available_subtitles(self, video_id, talk_info):
|
||||||
try:
|
languages = [lang['languageCode'] for lang in talk_info.get('languages', [])]
|
||||||
options = self._search_regex(r'(?:<select name="subtitles_language_select" id="subtitles_language_select">)(.*?)(?:</select>)', webpage, 'subtitles_language_select', flags=re.DOTALL)
|
if languages:
|
||||||
languages = re.findall(r'(?:<option value=")(\S+)"', options)
|
sub_lang_list = {}
|
||||||
if languages:
|
for l in languages:
|
||||||
sub_lang_list = {}
|
url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
|
||||||
for l in languages:
|
sub_lang_list[l] = url
|
||||||
url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
|
return sub_lang_list
|
||||||
sub_lang_list[l] = url
|
else:
|
||||||
return sub_lang_list
|
|
||||||
except RegexNotFoundError:
|
|
||||||
self._downloader.report_warning(u'video doesn\'t have subtitles')
|
self._downloader.report_warning(u'video doesn\'t have subtitles')
|
||||||
return {}
|
return {}
|
||||||
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from youtube_dl.utils import ExtractorError
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
class TinyPicIE(InfoExtractor):
|
class TinyPicIE(InfoExtractor):
|
||||||
|
75
youtube_dl/extractor/toypics.py
Normal file
75
youtube_dl/extractor/toypics.py
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
from .common import InfoExtractor
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
class ToypicsIE(InfoExtractor):
|
||||||
|
IE_DESC = 'Toypics user profile'
|
||||||
|
_VALID_URL = r'http://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/',
|
||||||
|
'md5': '16e806ad6d6f58079d210fe30985e08b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '514',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Chance-Bulge\'d, 2',
|
||||||
|
'age_limit': 18,
|
||||||
|
'uploader': 'kidsune',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
page = self._download_webpage(url, video_id)
|
||||||
|
video_url = self._html_search_regex(
|
||||||
|
r'src:\s+"(http://static[0-9]+\.toypics\.net/flvideo/[^"]+)"', page, 'video URL')
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<title>Toypics - ([^<]+)</title>', page, 'title')
|
||||||
|
username = self._html_search_regex(
|
||||||
|
r'toypics.net/([^/"]+)" class="user-name">', page, 'username')
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'uploader': username,
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ToypicsUserIE(InfoExtractor):
|
||||||
|
IE_DESC = 'Toypics user profile'
|
||||||
|
_VALID_URL = r'http://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
username = mobj.group('username')
|
||||||
|
|
||||||
|
profile_page = self._download_webpage(
|
||||||
|
url, username, note='Retrieving profile page')
|
||||||
|
|
||||||
|
video_count = int(self._search_regex(
|
||||||
|
r'public/">Public Videos \(([0-9]+)\)</a></li>', profile_page,
|
||||||
|
'video count'))
|
||||||
|
|
||||||
|
PAGE_SIZE = 8
|
||||||
|
urls = []
|
||||||
|
page_count = (video_count + PAGE_SIZE + 1) // PAGE_SIZE
|
||||||
|
for n in range(1, page_count + 1):
|
||||||
|
lpage_url = url + '/public/%d' % n
|
||||||
|
lpage = self._download_webpage(
|
||||||
|
lpage_url, username,
|
||||||
|
note='Downloading page %d/%d' % (n, page_count))
|
||||||
|
urls.extend(
|
||||||
|
re.findall(
|
||||||
|
r'<p class="video-entry-title">\n\s*<a href="(http://videos.toypics.net/view/[^"]+)">',
|
||||||
|
lpage))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': username,
|
||||||
|
'entries': [{
|
||||||
|
'_type': 'url',
|
||||||
|
'url': eurl,
|
||||||
|
'ie_key': 'Toypics',
|
||||||
|
} for eurl in urls]
|
||||||
|
}
|
84
youtube_dl/extractor/tvigle.py
Normal file
84
youtube_dl/extractor/tvigle.py
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
unified_strdate,
|
||||||
|
clean_html,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TvigleIE(InfoExtractor):
|
||||||
|
IE_NAME = 'tvigle'
|
||||||
|
IE_DESC = 'Интернет-телевидение Tvigle.ru'
|
||||||
|
_VALID_URL = r'http://(?:www\.)?tvigle\.ru/category/.+?[\?&]v(?:ideo)?=(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.tvigle.ru/category/cinema/1608/?video=503081',
|
||||||
|
'md5': '09afba4616666249f087efc6dcf83cb3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '503081',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Брат 2 ',
|
||||||
|
'description': 'md5:f5a42970f50648cee3d7ad740f3ae769',
|
||||||
|
'upload_date': '20110919',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.tvigle.ru/category/men/vysotskiy_vospominaniya02/?flt=196&v=676433',
|
||||||
|
'md5': 'e7efe5350dd5011d0de6550b53c3ba7b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '676433',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Ведущий телепрограммы «60 минут» (США) о Владимире Высоцком',
|
||||||
|
'description': 'md5:027f7dc872948f14c96d19b4178428a4',
|
||||||
|
'upload_date': '20121218',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
video_data = self._download_xml(
|
||||||
|
'http://www.tvigle.ru/xml/single.php?obj=%s' % video_id, video_id, 'Downloading video XML')
|
||||||
|
|
||||||
|
video = video_data.find('./video')
|
||||||
|
|
||||||
|
title = video.get('name')
|
||||||
|
description = video.get('anons')
|
||||||
|
if description:
|
||||||
|
description = clean_html(description)
|
||||||
|
thumbnail = video_data.get('img')
|
||||||
|
upload_date = unified_strdate(video.get('date'))
|
||||||
|
like_count = int_or_none(video.get('vtp'))
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for num, (format_id, format_note) in enumerate([['low_file', 'SQ'], ['file', 'HQ'], ['hd', 'HD 720']]):
|
||||||
|
video_url = video.get(format_id)
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'format_note': format_note,
|
||||||
|
'quality': num,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'like_count': like_count,
|
||||||
|
'age_limit': 18,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
164
youtube_dl/extractor/udemy.py
Normal file
164
youtube_dl/extractor/udemy.py
Normal file
@ -0,0 +1,164 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class UdemyIE(InfoExtractor):
|
||||||
|
IE_NAME = 'udemy'
|
||||||
|
_VALID_URL = r'https?://www\.udemy\.com/(?:[^#]+#/lecture/|lecture/view/?\?lectureId=)(?P<id>\d+)'
|
||||||
|
_LOGIN_URL = 'https://www.udemy.com/join/login-submit/'
|
||||||
|
_NETRC_MACHINE = 'udemy'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.udemy.com/java-tutorial/#/lecture/172757',
|
||||||
|
'md5': '98eda5b657e752cf945d8445e261b5c5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '160614',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Introduction and Installation',
|
||||||
|
'description': 'md5:c0d51f6f21ef4ec65f091055a5eef876',
|
||||||
|
'duration': 579.29,
|
||||||
|
},
|
||||||
|
'skip': 'Requires udemy account credentials',
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _handle_error(self, response):
|
||||||
|
if not isinstance(response, dict):
|
||||||
|
return
|
||||||
|
error = response.get('error')
|
||||||
|
if error:
|
||||||
|
error_str = 'Udemy returned error #%s: %s' % (error.get('code'), error.get('message'))
|
||||||
|
error_data = error.get('data')
|
||||||
|
if error_data:
|
||||||
|
error_str += ' - %s' % error_data.get('formErrors')
|
||||||
|
raise ExtractorError(error_str, expected=True)
|
||||||
|
|
||||||
|
def _download_json(self, url, video_id, note='Downloading JSON metadata'):
|
||||||
|
response = super(UdemyIE, self)._download_json(url, video_id, note)
|
||||||
|
self._handle_error(response)
|
||||||
|
return response
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._login()
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
(username, password) = self._get_login_info()
|
||||||
|
if username is None:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Udemy account is required, use --username and --password options to provide account credentials.',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
login_popup = self._download_webpage(
|
||||||
|
'https://www.udemy.com/join/login-popup?displayType=ajax&showSkipButton=1', None,
|
||||||
|
'Downloading login popup')
|
||||||
|
|
||||||
|
if login_popup == '<div class="run-command close-popup redirect" data-url="https://www.udemy.com/"></div>':
|
||||||
|
return
|
||||||
|
|
||||||
|
csrf = self._html_search_regex(r'<input type="hidden" name="csrf" value="(.+?)"', login_popup, 'csrf token')
|
||||||
|
|
||||||
|
login_form = {
|
||||||
|
'email': username,
|
||||||
|
'password': password,
|
||||||
|
'csrf': csrf,
|
||||||
|
'displayType': 'json',
|
||||||
|
'isSubmitted': '1',
|
||||||
|
}
|
||||||
|
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
||||||
|
response = self._download_json(request, None, 'Logging in as %s' % username)
|
||||||
|
|
||||||
|
if 'returnUrl' not in response:
|
||||||
|
raise ExtractorError('Unable to log in')
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
lecture_id = mobj.group('id')
|
||||||
|
|
||||||
|
lecture = self._download_json(
|
||||||
|
'https://www.udemy.com/api-1.1/lectures/%s' % lecture_id, lecture_id, 'Downloading lecture JSON')
|
||||||
|
|
||||||
|
if lecture['assetType'] != 'Video':
|
||||||
|
raise ExtractorError('Lecture %s is not a video' % lecture_id, expected=True)
|
||||||
|
|
||||||
|
asset = lecture['asset']
|
||||||
|
|
||||||
|
stream_url = asset['streamUrl']
|
||||||
|
mobj = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', stream_url)
|
||||||
|
if mobj:
|
||||||
|
return self.url_result(mobj.group(1), 'Youtube')
|
||||||
|
|
||||||
|
video_id = asset['id']
|
||||||
|
thumbnail = asset['thumbnailUrl']
|
||||||
|
duration = asset['data']['duration']
|
||||||
|
|
||||||
|
download_url = asset['downloadUrl']
|
||||||
|
|
||||||
|
formats = [
|
||||||
|
{
|
||||||
|
'url': download_url['Video480p'][0],
|
||||||
|
'format_id': '360p',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': download_url['Video'][0],
|
||||||
|
'format_id': '720p',
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
title = lecture['title']
|
||||||
|
description = lecture['description']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class UdemyCourseIE(UdemyIE):
|
||||||
|
IE_NAME = 'udemy:course'
|
||||||
|
_VALID_URL = r'https?://www\.udemy\.com/(?P<coursepath>[\da-z-]+)'
|
||||||
|
_SUCCESSFULLY_ENROLLED = '>You have enrolled in this course!<'
|
||||||
|
_ALREADY_ENROLLED = '>You are already taking this course.<'
|
||||||
|
_TESTS = []
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if UdemyIE.suitable(url) else super(UdemyCourseIE, cls).suitable(url)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
course_path = mobj.group('coursepath')
|
||||||
|
|
||||||
|
response = self._download_json(
|
||||||
|
'https://www.udemy.com/api-1.1/courses/%s' % course_path, course_path, 'Downloading course JSON')
|
||||||
|
|
||||||
|
course_id = int(response['id'])
|
||||||
|
course_title = response['title']
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'https://www.udemy.com/course/subscribe/?courseId=%s' % course_id, course_id, 'Enrolling in the course')
|
||||||
|
|
||||||
|
if self._SUCCESSFULLY_ENROLLED in webpage:
|
||||||
|
self.to_screen('%s: Successfully enrolled in' % course_id)
|
||||||
|
elif self._ALREADY_ENROLLED in webpage:
|
||||||
|
self.to_screen('%s: Already enrolled in' % course_id)
|
||||||
|
|
||||||
|
response = self._download_json('https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id,
|
||||||
|
course_id, 'Downloading course curriculum')
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result('https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), 'Udemy')
|
||||||
|
for asset in response if asset.get('assetType') == 'Video'
|
||||||
|
]
|
||||||
|
|
||||||
|
return self.playlist_result(entries, course_id, course_title)
|
@ -4,14 +4,11 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import ExtractorError
|
||||||
ExtractorError,
|
from .rutv import RUTVIE
|
||||||
int_or_none
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class VestiIE(InfoExtractor):
|
class VestiIE(InfoExtractor):
|
||||||
IE_NAME = 'vesti'
|
|
||||||
IE_DESC = 'Вести.Ru'
|
IE_DESC = 'Вести.Ru'
|
||||||
_VALID_URL = r'http://(?:.+?\.)?vesti\.ru/(?P<id>.+)'
|
_VALID_URL = r'http://(?:.+?\.)?vesti\.ru/(?P<id>.+)'
|
||||||
|
|
||||||
@ -30,6 +27,20 @@ class VestiIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.vesti.ru/doc.html?id=1349233',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '773865',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Участники митинга штурмуют Донецкую областную администрацию',
|
||||||
|
'description': 'md5:1a160e98b3195379b4c849f2f4958009',
|
||||||
|
'duration': 210,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.vesti.ru/only_video.html?vid=576180',
|
'url': 'http://www.vesti.ru/only_video.html?vid=576180',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -44,6 +55,20 @@ class VestiIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://hitech.vesti.ru/news/view/id/4000',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '766888',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"',
|
||||||
|
'description': 'md5:65ddd47f9830c4f42ed6475f8730c995',
|
||||||
|
'duration': 279,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403',
|
'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -57,7 +82,7 @@ class VestiIE(InfoExtractor):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'Blocked outside Russia'
|
'skip': 'Blocked outside Russia',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://sochi2014.vesti.ru/live/play/live_id/301',
|
'url': 'http://sochi2014.vesti.ru/live/play/live_id/301',
|
||||||
@ -72,7 +97,7 @@ class VestiIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'Translation has finished'
|
'skip': 'Translation has finished'
|
||||||
}
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -81,90 +106,16 @@ class VestiIE(InfoExtractor):
|
|||||||
|
|
||||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||||
|
|
||||||
mobj = re.search(r'<meta property="og:video" content=".+?\.swf\?v?id=(?P<id>\d+).*?" />', page)
|
mobj = re.search(
|
||||||
|
r'<meta[^>]+?property="og:video"[^>]+?content="http://www\.vesti\.ru/i/flvplayer_videoHost\.swf\?vid=(?P<id>\d+)',
|
||||||
|
page)
|
||||||
if mobj:
|
if mobj:
|
||||||
video_type = 'video'
|
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
else:
|
page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id,
|
||||||
mobj = re.search(
|
'Downloading video page')
|
||||||
r'<iframe.+?src="http://player\.rutv\.ru/iframe/(?P<type>[^/]+)/id/(?P<id>\d+)[^"]*".*?></iframe>', page)
|
|
||||||
|
|
||||||
if not mobj:
|
rutv_url = RUTVIE._extract_url(page)
|
||||||
raise ExtractorError('No media found')
|
if rutv_url:
|
||||||
|
return self.url_result(rutv_url, 'RUTV')
|
||||||
|
|
||||||
video_type = mobj.group('type')
|
raise ExtractorError('No video found', expected=True)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
json_data = self._download_json(
|
|
||||||
'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id),
|
|
||||||
video_id, 'Downloading JSON')
|
|
||||||
|
|
||||||
if json_data['errors']:
|
|
||||||
raise ExtractorError('vesti returned error: %s' % json_data['errors'], expected=True)
|
|
||||||
|
|
||||||
playlist = json_data['data']['playlist']
|
|
||||||
medialist = playlist['medialist']
|
|
||||||
media = medialist[0]
|
|
||||||
|
|
||||||
if media['errors']:
|
|
||||||
raise ExtractorError('vesti returned error: %s' % media['errors'], expected=True)
|
|
||||||
|
|
||||||
view_count = playlist.get('count_views')
|
|
||||||
priority_transport = playlist['priority_transport']
|
|
||||||
|
|
||||||
thumbnail = media['picture']
|
|
||||||
width = media['width']
|
|
||||||
height = media['height']
|
|
||||||
description = media['anons']
|
|
||||||
title = media['title']
|
|
||||||
duration = int_or_none(media.get('duration'))
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
|
|
||||||
for transport, links in media['sources'].items():
|
|
||||||
for quality, url in links.items():
|
|
||||||
if transport == 'rtmp':
|
|
||||||
mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url)
|
|
||||||
if not mobj:
|
|
||||||
continue
|
|
||||||
fmt = {
|
|
||||||
'url': mobj.group('url'),
|
|
||||||
'play_path': mobj.group('playpath'),
|
|
||||||
'app': mobj.group('app'),
|
|
||||||
'page_url': 'http://player.rutv.ru',
|
|
||||||
'player_url': 'http://player.rutv.ru/flash2v/osmf.swf?i=22',
|
|
||||||
'rtmp_live': True,
|
|
||||||
'ext': 'flv',
|
|
||||||
'vbr': int(quality),
|
|
||||||
}
|
|
||||||
elif transport == 'm3u8':
|
|
||||||
fmt = {
|
|
||||||
'url': url,
|
|
||||||
'ext': 'mp4',
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
fmt = {
|
|
||||||
'url': url
|
|
||||||
}
|
|
||||||
fmt.update({
|
|
||||||
'width': width,
|
|
||||||
'height': height,
|
|
||||||
'format_id': '%s-%s' % (transport, quality),
|
|
||||||
'preference': -1 if priority_transport == transport else -2,
|
|
||||||
})
|
|
||||||
formats.append(fmt)
|
|
||||||
|
|
||||||
if not formats:
|
|
||||||
raise ExtractorError('No media links available for %s' % video_id)
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'view_count': view_count,
|
|
||||||
'duration': duration,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
@ -2,7 +2,6 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
import datetime
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -22,6 +21,7 @@ class VevoIE(InfoExtractor):
|
|||||||
https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
|
https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
|
||||||
vevo:)
|
vevo:)
|
||||||
(?P<id>[^&?#]+)'''
|
(?P<id>[^&?#]+)'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
||||||
"md5": "06bea460acb744eab74a9d7dcb4bfd61",
|
"md5": "06bea460acb744eab74a9d7dcb4bfd61",
|
||||||
@ -34,6 +34,8 @@ class VevoIE(InfoExtractor):
|
|||||||
"duration": 230.12,
|
"duration": 230.12,
|
||||||
"width": 1920,
|
"width": 1920,
|
||||||
"height": 1080,
|
"height": 1080,
|
||||||
|
# timestamp and upload_date are often incorrect; seem to change randomly
|
||||||
|
'timestamp': int,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'note': 'v3 SMIL format',
|
'note': 'v3 SMIL format',
|
||||||
@ -47,6 +49,7 @@ class VevoIE(InfoExtractor):
|
|||||||
'title': 'I Wish I Could Break Your Heart',
|
'title': 'I Wish I Could Break Your Heart',
|
||||||
'duration': 226.101,
|
'duration': 226.101,
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'timestamp': int,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'note': 'Age-limited video',
|
'note': 'Age-limited video',
|
||||||
@ -57,7 +60,8 @@ class VevoIE(InfoExtractor):
|
|||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'title': 'Tunnel Vision (Explicit)',
|
'title': 'Tunnel Vision (Explicit)',
|
||||||
'uploader': 'Justin Timberlake',
|
'uploader': 'Justin Timberlake',
|
||||||
'upload_date': '20130704',
|
'upload_date': 're:2013070[34]',
|
||||||
|
'timestamp': int,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': 'true',
|
'skip_download': 'true',
|
||||||
@ -169,13 +173,13 @@ class VevoIE(InfoExtractor):
|
|||||||
|
|
||||||
timestamp_ms = int(self._search_regex(
|
timestamp_ms = int(self._search_regex(
|
||||||
r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
|
r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
|
||||||
upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_info['title'],
|
'title': video_info['title'],
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': video_info['imageUrl'],
|
'thumbnail': video_info['imageUrl'],
|
||||||
'upload_date': upload_date.strftime('%Y%m%d'),
|
'timestamp': timestamp_ms // 1000,
|
||||||
'uploader': video_info['mainArtists'][0]['artistName'],
|
'uploader': video_info['mainArtists'][0]['artistName'],
|
||||||
'duration': video_info['duration'],
|
'duration': video_info['duration'],
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
|
@ -29,6 +29,7 @@ class VideoBamIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'pqLvq',
|
'id': 'pqLvq',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
'title': '_',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
@ -61,7 +62,7 @@ class VideoBamIE(InfoExtractor):
|
|||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = self._og_search_title(page, default='VideoBam', fatal=False)
|
title = self._og_search_title(page, default='_', fatal=False)
|
||||||
description = self._og_search_description(page, default=None)
|
description = self._og_search_description(page, default=None)
|
||||||
thumbnail = self._og_search_thumbnail(page)
|
thumbnail = self._og_search_thumbnail(page)
|
||||||
uploader = self._html_search_regex(r'Upload by ([^<]+)</a>', page, 'uploader', fatal=False, default=None)
|
uploader = self._html_search_regex(r'Upload by ([^<]+)</a>', page, 'uploader', fatal=False, default=None)
|
||||||
|
@ -1,22 +1,23 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .internetvideoarchive import InternetVideoArchiveIE
|
from .internetvideoarchive import InternetVideoArchiveIE
|
||||||
from ..utils import (
|
from ..utils import compat_urlparse
|
||||||
compat_urlparse,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class VideoDetectiveIE(InfoExtractor):
|
class VideoDetectiveIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)'
|
_VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.videodetective.com/movies/kick-ass-2/194487',
|
'url': 'http://www.videodetective.com/movies/kick-ass-2/194487',
|
||||||
u'file': u'194487.mp4',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '194487',
|
||||||
u'title': u'KICK-ASS 2',
|
'ext': 'mp4',
|
||||||
u'description': u'md5:65ba37ad619165afac7d432eaded6013',
|
'title': 'KICK-ASS 2',
|
||||||
u'duration': 135,
|
'description': 'md5:65ba37ad619165afac7d432eaded6013',
|
||||||
|
'duration': 135,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -26,5 +27,4 @@ class VideoDetectiveIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
og_video = self._og_search_video_url(webpage)
|
og_video = self._og_search_video_url(webpage)
|
||||||
query = compat_urlparse.urlparse(og_video).query
|
query = compat_urlparse.urlparse(og_video).query
|
||||||
return self.url_result(InternetVideoArchiveIE._build_url(query),
|
return self.url_result(InternetVideoArchiveIE._build_url(query), ie=InternetVideoArchiveIE.ie_key())
|
||||||
ie=InternetVideoArchiveIE.ie_key())
|
|
||||||
|
70
youtube_dl/extractor/videolecturesnet.py
Normal file
70
youtube_dl/extractor/videolecturesnet.py
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
find_xpath_attr,
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class VideoLecturesNetIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/'
|
||||||
|
IE_NAME = 'videolectures.net'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'promogram_igor_mekjavic_eng',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Automatics, robotics and biocybernetics',
|
||||||
|
'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
|
||||||
|
'upload_date': '20130627',
|
||||||
|
'duration': 565,
|
||||||
|
'thumbnail': 're:http://.*\.jpg',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
smil_url = 'http://videolectures.net/%s/video/1/smil.xml' % video_id
|
||||||
|
smil = self._download_xml(smil_url, video_id)
|
||||||
|
|
||||||
|
title = find_xpath_attr(smil, './/meta', 'name', 'title').attrib['content']
|
||||||
|
description_el = find_xpath_attr(smil, './/meta', 'name', 'abstract')
|
||||||
|
description = (
|
||||||
|
None if description_el is None
|
||||||
|
else description_el.attrib['content'])
|
||||||
|
upload_date = unified_strdate(
|
||||||
|
find_xpath_attr(smil, './/meta', 'name', 'date').attrib['content'])
|
||||||
|
|
||||||
|
switch = smil.find('.//switch')
|
||||||
|
duration = parse_duration(switch.attrib.get('dur'))
|
||||||
|
thumbnail_el = find_xpath_attr(switch, './image', 'type', 'thumbnail')
|
||||||
|
thumbnail = (
|
||||||
|
None if thumbnail_el is None else thumbnail_el.attrib.get('src'))
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': v.attrib['src'],
|
||||||
|
'width': int_or_none(v.attrib.get('width')),
|
||||||
|
'height': int_or_none(v.attrib.get('height')),
|
||||||
|
'filesize': int_or_none(v.attrib.get('size')),
|
||||||
|
'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
|
||||||
|
'ext': v.attrib.get('ext'),
|
||||||
|
} for v in switch.findall('./video')
|
||||||
|
if v.attrib.get('proto') == 'http']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'duration': duration,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -1,29 +1,33 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
US_RATINGS,
|
||||||
)
|
)
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class VikiIE(SubtitlesInfoExtractor):
|
class VikiIE(SubtitlesInfoExtractor):
|
||||||
IE_NAME = u'viki'
|
IE_NAME = 'viki'
|
||||||
|
|
||||||
_VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
|
_VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14',
|
'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
|
||||||
u'file': u'1023585v.mp4',
|
'md5': 'a21454021c2646f5433514177e2caa5f',
|
||||||
u'md5': u'a21454021c2646f5433514177e2caa5f',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '1023585v',
|
||||||
u'title': u'Heirs Episode 14',
|
'ext': 'mp4',
|
||||||
u'uploader': u'SBS',
|
'title': 'Heirs Episode 14',
|
||||||
u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
|
'uploader': 'SBS',
|
||||||
u'upload_date': u'20131121',
|
'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
|
||||||
u'age_limit': 13,
|
'upload_date': '20131121',
|
||||||
|
'age_limit': 13,
|
||||||
},
|
},
|
||||||
u'skip': u'Blocked in the US',
|
'skip': 'Blocked in the US',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -44,28 +48,21 @@ class VikiIE(SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
rating_str = self._html_search_regex(
|
rating_str = self._html_search_regex(
|
||||||
r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
|
r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
|
||||||
u'rating information', default='').strip()
|
'rating information', default='').strip()
|
||||||
RATINGS = {
|
age_limit = US_RATINGS.get(rating_str)
|
||||||
'G': 0,
|
|
||||||
'PG': 10,
|
|
||||||
'PG-13': 13,
|
|
||||||
'R': 16,
|
|
||||||
'NC': 18,
|
|
||||||
}
|
|
||||||
age_limit = RATINGS.get(rating_str)
|
|
||||||
|
|
||||||
info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
|
info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
|
||||||
info_webpage = self._download_webpage(
|
info_webpage = self._download_webpage(
|
||||||
info_url, video_id, note=u'Downloading info page')
|
info_url, video_id, note='Downloading info page')
|
||||||
if re.match(r'\s*<div\s+class="video-error', info_webpage):
|
if re.match(r'\s*<div\s+class="video-error', info_webpage):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
u'Video %s is blocked from your location.' % video_id,
|
'Video %s is blocked from your location.' % video_id,
|
||||||
expected=True)
|
expected=True)
|
||||||
video_url = self._html_search_regex(
|
video_url = self._html_search_regex(
|
||||||
r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
|
r'<source[^>]+src="([^"]+)"', info_webpage, 'video URL')
|
||||||
|
|
||||||
upload_date_str = self._html_search_regex(
|
upload_date_str = self._html_search_regex(
|
||||||
r'"created_at":"([^"]+)"', info_webpage, u'upload date')
|
r'"created_at":"([^"]+)"', info_webpage, 'upload date')
|
||||||
upload_date = (
|
upload_date = (
|
||||||
unified_strdate(upload_date_str)
|
unified_strdate(upload_date_str)
|
||||||
if upload_date_str is not None
|
if upload_date_str is not None
|
||||||
|
@ -8,6 +8,7 @@ import itertools
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
compat_HTTPError,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
clean_html,
|
clean_html,
|
||||||
@ -101,6 +102,15 @@ class VimeoIE(SubtitlesInfoExtractor):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
if VimeoChannelIE.suitable(url):
|
||||||
|
# Otherwise channel urls like http://vimeo.com/channels/31259 would
|
||||||
|
# match
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return super(VimeoIE, cls).suitable(url)
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
(username, password) = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
@ -172,7 +182,18 @@ class VimeoIE(SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
# Retrieve video webpage to extract further information
|
# Retrieve video webpage to extract further information
|
||||||
request = compat_urllib_request.Request(url, None, headers)
|
request = compat_urllib_request.Request(url, None, headers)
|
||||||
webpage = self._download_webpage(request, video_id)
|
try:
|
||||||
|
webpage = self._download_webpage(request, video_id)
|
||||||
|
except ExtractorError as ee:
|
||||||
|
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||||
|
errmsg = ee.cause.read()
|
||||||
|
if b'Because of its privacy settings, this video cannot be played here' in errmsg:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Cannot download embed-only video without embedding '
|
||||||
|
'URL. Please call youtube-dl with the URL of the page '
|
||||||
|
'that embeds this video.',
|
||||||
|
expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
# Now we begin extracting as much information as we can from what we
|
# Now we begin extracting as much information as we can from what we
|
||||||
# retrieved. First we extract the information common to all extractors,
|
# retrieved. First we extract the information common to all extractors,
|
||||||
@ -221,7 +242,9 @@ class VimeoIE(SubtitlesInfoExtractor):
|
|||||||
# Extract video thumbnail
|
# Extract video thumbnail
|
||||||
video_thumbnail = config["video"].get("thumbnail")
|
video_thumbnail = config["video"].get("thumbnail")
|
||||||
if video_thumbnail is None:
|
if video_thumbnail is None:
|
||||||
_, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in config["video"]["thumbs"].items())[-1]
|
video_thumbs = config["video"].get("thumbs")
|
||||||
|
if video_thumbs and isinstance(video_thumbs, dict):
|
||||||
|
_, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in video_thumbs.items())[-1]
|
||||||
|
|
||||||
# Extract video description
|
# Extract video description
|
||||||
video_description = None
|
video_description = None
|
||||||
@ -318,7 +341,7 @@ class VimeoIE(SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
class VimeoChannelIE(InfoExtractor):
|
class VimeoChannelIE(InfoExtractor):
|
||||||
IE_NAME = 'vimeo:channel'
|
IE_NAME = 'vimeo:channel'
|
||||||
_VALID_URL = r'(?:https?://)?vimeo\.com/channels/(?P<id>[^/]+)'
|
_VALID_URL = r'(?:https?://)?vimeo\.com/channels/(?P<id>[^/]+)/?(\?.*)?$'
|
||||||
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
|
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
|
||||||
_TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
|
_TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
|
||||||
|
|
||||||
|
@ -16,7 +16,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class VKIE(InfoExtractor):
|
class VKIE(InfoExtractor):
|
||||||
IE_NAME = 'vk.com'
|
IE_NAME = 'vk.com'
|
||||||
_VALID_URL = r'https?://vk\.com/(?:videos.*?\?.*?z=)?video(?P<id>.*?)(?:\?|%2F|$)'
|
_VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
|
||||||
_NETRC_MACHINE = 'vk'
|
_NETRC_MACHINE = 'vk'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
@ -42,6 +42,18 @@ class VKIE(InfoExtractor):
|
|||||||
'duration': 558,
|
'duration': 558,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'note': 'Embedded video',
|
||||||
|
'url': 'http://vk.com/video_ext.php?oid=32194266&id=162925554&hash=7d8c2e0d5e05aeaa&hd=1',
|
||||||
|
'md5': 'c7ce8f1f87bec05b3de07fdeafe21a0a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '162925554',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader': 'Vladimir Gavrin',
|
||||||
|
'title': 'Lin Dan',
|
||||||
|
'duration': 101,
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://vk.com/video-8871596_164049491',
|
'url': 'http://vk.com/video-8871596_164049491',
|
||||||
'md5': 'a590bcaf3d543576c9bd162812387666',
|
'md5': 'a590bcaf3d543576c9bd162812387666',
|
||||||
@ -54,7 +66,7 @@ class VKIE(InfoExtractor):
|
|||||||
'duration': 8352,
|
'duration': 8352,
|
||||||
},
|
},
|
||||||
'skip': 'Requires vk account credentials',
|
'skip': 'Requires vk account credentials',
|
||||||
}
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
@ -82,7 +94,10 @@ class VKIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('videoid')
|
||||||
|
|
||||||
|
if not video_id:
|
||||||
|
video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
|
||||||
|
|
||||||
info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id
|
info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id
|
||||||
info_page = self._download_webpage(info_url, video_id)
|
info_page = self._download_webpage(info_url, video_id)
|
||||||
|
@ -13,7 +13,7 @@ class VubeIE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
|
'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
|
||||||
'md5': 'f81dcf6d0448e3291f54380181695821',
|
'md5': 'db7aba89d4603dadd627e9d1973946fe',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'YL2qNPkqon',
|
'id': 'YL2qNPkqon',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
114
youtube_dl/extractor/wdr.py
Normal file
114
youtube_dl/extractor/wdr.py
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
unified_strdate,
|
||||||
|
compat_urlparse,
|
||||||
|
determine_ext,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class WDRIE(InfoExtractor):
|
||||||
|
_PLAYER_REGEX = '-(?:video|audio)player(?:_size-[LMS])?'
|
||||||
|
_VALID_URL = r'(?P<url>https?://www\d?\.(?:wdr\d?|funkhauseuropa)\.de/)(?P<id>.+?)(?P<player>%s)?\.html' % _PLAYER_REGEX
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www1.wdr.de/mediathek/video/sendungen/servicezeit/videoservicezeit560-videoplayer_size-L.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'mdb-362427',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Servicezeit',
|
||||||
|
'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb',
|
||||||
|
'upload_date': '20140310',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www1.wdr.de/themen/av/videomargaspiegelisttot101-videoplayer.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'mdb-363194',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Marga Spiegel ist tot',
|
||||||
|
'description': 'md5:2309992a6716c347891c045be50992e4',
|
||||||
|
'upload_date': '20140311',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www1.wdr.de/themen/kultur/audioerlebtegeschichtenmargaspiegel100-audioplayer.html',
|
||||||
|
'md5': '83e9e8fefad36f357278759870805898',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'mdb-194332',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)',
|
||||||
|
'description': 'md5:2309992a6716c347891c045be50992e4',
|
||||||
|
'upload_date': '20091129',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.funkhauseuropa.de/av/audiogrenzenlosleckerbaklava101-audioplayer.html',
|
||||||
|
'md5': 'cfff440d4ee64114083ac44676df5d15',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'mdb-363068',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Grenzenlos lecker - Baklava',
|
||||||
|
'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
|
||||||
|
'upload_date': '20140311',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
page_url = mobj.group('url')
|
||||||
|
page_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, page_id)
|
||||||
|
|
||||||
|
if mobj.group('player') is None:
|
||||||
|
entries = [
|
||||||
|
self.url_result(page_url + href, 'WDR')
|
||||||
|
for href in re.findall(r'<a href="/?(.+?%s\.html)" rel="nofollow"' % self._PLAYER_REGEX, webpage)
|
||||||
|
]
|
||||||
|
return self.playlist_result(entries, page_id)
|
||||||
|
|
||||||
|
flashvars = compat_urlparse.parse_qs(
|
||||||
|
self._html_search_regex(r'<param name="flashvars" value="([^"]+)"', webpage, 'flashvars'))
|
||||||
|
|
||||||
|
page_id = flashvars['trackerClipId'][0]
|
||||||
|
video_url = flashvars['dslSrc'][0]
|
||||||
|
title = flashvars['trackerClipTitle'][0]
|
||||||
|
thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None
|
||||||
|
|
||||||
|
if 'trackerClipAirTime' in flashvars:
|
||||||
|
upload_date = flashvars['trackerClipAirTime'][0]
|
||||||
|
else:
|
||||||
|
upload_date = self._html_search_meta('DC.Date', webpage, 'upload date')
|
||||||
|
|
||||||
|
if upload_date:
|
||||||
|
upload_date = unified_strdate(upload_date)
|
||||||
|
|
||||||
|
if video_url.endswith('.f4m'):
|
||||||
|
video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18'
|
||||||
|
ext = 'flv'
|
||||||
|
else:
|
||||||
|
ext = determine_ext(video_url)
|
||||||
|
|
||||||
|
description = self._html_search_meta('Description', webpage, 'description')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': page_id,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': ext,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
}
|
@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -7,14 +9,14 @@ class WorldStarHipHopIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)'
|
_VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
"url": "http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO",
|
"url": "http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO",
|
||||||
"file": "wshh6a7q1ny0G34ZwuIO.mp4",
|
|
||||||
"md5": "9d04de741161603bf7071bbf4e883186",
|
"md5": "9d04de741161603bf7071bbf4e883186",
|
||||||
"info_dict": {
|
"info_dict": {
|
||||||
|
"id": "wshh6a7q1ny0G34ZwuIO",
|
||||||
|
"ext": "mp4",
|
||||||
"title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
|
"title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
m = re.match(self._VALID_URL, url)
|
||||||
video_id = m.group('id')
|
video_id = m.group('id')
|
||||||
@ -23,41 +25,32 @@ class WorldStarHipHopIE(InfoExtractor):
|
|||||||
|
|
||||||
m_vevo_id = re.search(r'videoId=(.*?)&?',
|
m_vevo_id = re.search(r'videoId=(.*?)&?',
|
||||||
webpage_src)
|
webpage_src)
|
||||||
|
|
||||||
if m_vevo_id is not None:
|
if m_vevo_id is not None:
|
||||||
self.to_screen(u'Vevo video detected:')
|
|
||||||
return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
|
return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
|
||||||
|
|
||||||
video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)',
|
video_url = self._search_regex(
|
||||||
webpage_src, u'video URL')
|
r'so\.addVariable\("file","(.*?)"\)', webpage_src, 'video URL')
|
||||||
|
|
||||||
if 'youtube' in video_url:
|
if 'youtube' in video_url:
|
||||||
self.to_screen(u'Youtube video detected:')
|
|
||||||
return self.url_result(video_url, ie='Youtube')
|
return self.url_result(video_url, ie='Youtube')
|
||||||
|
|
||||||
if 'mp4' in video_url:
|
video_title = self._html_search_regex(
|
||||||
ext = 'mp4'
|
r"<title>(.*)</title>", webpage_src, 'title')
|
||||||
else:
|
|
||||||
ext = 'flv'
|
|
||||||
|
|
||||||
video_title = self._html_search_regex(r"<title>(.*)</title>",
|
|
||||||
webpage_src, u'title')
|
|
||||||
|
|
||||||
# Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
|
# Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
|
||||||
thumbnail = self._html_search_regex(r'rel="image_src" href="(.*)" />',
|
thumbnail = self._html_search_regex(
|
||||||
webpage_src, u'thumbnail', fatal=False)
|
r'rel="image_src" href="(.*)" />', webpage_src, 'thumbnail',
|
||||||
|
fatal=False)
|
||||||
if not thumbnail:
|
if not thumbnail:
|
||||||
_title = r"""candytitles.*>(.*)</span>"""
|
_title = r"""candytitles.*>(.*)</span>"""
|
||||||
mobj = re.search(_title, webpage_src)
|
mobj = re.search(_title, webpage_src)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
video_title = mobj.group(1)
|
video_title = mobj.group(1)
|
||||||
|
|
||||||
results = [{
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url' : video_url,
|
'url': video_url,
|
||||||
'title' : video_title,
|
'title': video_title,
|
||||||
'thumbnail' : thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'ext' : ext,
|
}
|
||||||
}]
|
|
||||||
return results
|
|
||||||
|
50
youtube_dl/extractor/xbef.py
Normal file
50
youtube_dl/extractor/xbef.py
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class XBefIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?xbef\.com/video/(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://xbef.com/video/5119-glamourous-lesbians-smoking-drinking-and-fucking',
|
||||||
|
'md5': 'a478b565baff61634a98f5e5338be995',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5119',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:7358a9faef8b7b57acda7c04816f170e',
|
||||||
|
'age_limit': 18,
|
||||||
|
'thumbnail': 're:^http://.*\.jpg',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
m = re.match(self._VALID_URL, url)
|
||||||
|
video_id = m.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<h1[^>]*>(.*?)</h1>', webpage, 'title')
|
||||||
|
|
||||||
|
config_url_enc = self._download_webpage(
|
||||||
|
'http://xbef.com/Main/GetVideoURLEncoded/%s' % video_id, video_id,
|
||||||
|
note='Retrieving config URL')
|
||||||
|
config_url = compat_urllib_parse.unquote(config_url_enc)
|
||||||
|
config = self._download_xml(
|
||||||
|
config_url, video_id, note='Retrieving config')
|
||||||
|
|
||||||
|
video_url = config.find('./file').text
|
||||||
|
thumbnail = config.find('./image').text
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
||||||
|
|
@ -1,55 +1,49 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
|
|
||||||
ExtractorError,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class XNXXIE(InfoExtractor):
|
class XNXXIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:video|www)\.xnxx\.com/video([0-9]+)/(.*)'
|
_VALID_URL = r'^https?://(?:video|www)\.xnxx\.com/video(?P<id>[0-9]+)/(.*)'
|
||||||
VIDEO_URL_RE = r'flv_url=(.*?)&'
|
|
||||||
VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
|
|
||||||
VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&'
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
|
'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
|
||||||
u'file': u'1135332.flv',
|
'md5': '0831677e2b4761795f68d417e0b7b445',
|
||||||
u'md5': u'0831677e2b4761795f68d417e0b7b445',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '1135332',
|
||||||
u"title": u"lida \u00bb Naked Funny Actress (5)",
|
'ext': 'flv',
|
||||||
u"age_limit": 18,
|
'title': 'lida » Naked Funny Actress (5)',
|
||||||
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
video_id = mobj.group('id')
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
|
||||||
video_id = mobj.group(1)
|
|
||||||
|
|
||||||
# Get webpage content
|
# Get webpage content
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_url = self._search_regex(self.VIDEO_URL_RE,
|
video_url = self._search_regex(r'flv_url=(.*?)&',
|
||||||
webpage, u'video URL')
|
webpage, 'video URL')
|
||||||
video_url = compat_urllib_parse.unquote(video_url)
|
video_url = compat_urllib_parse.unquote(video_url)
|
||||||
|
|
||||||
video_title = self._html_search_regex(self.VIDEO_TITLE_RE,
|
video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM',
|
||||||
webpage, u'title')
|
webpage, 'title')
|
||||||
|
|
||||||
video_thumbnail = self._search_regex(self.VIDEO_THUMB_RE,
|
video_thumbnail = self._search_regex(r'url_bigthumb=(.*?)&',
|
||||||
webpage, u'thumbnail', fatal=False)
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
return [{
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'uploader': None,
|
|
||||||
'upload_date': None,
|
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'thumbnail': video_thumbnail,
|
'thumbnail': video_thumbnail,
|
||||||
'description': None,
|
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}]
|
}
|
||||||
|
@ -1,25 +1,29 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import os
|
|
||||||
import re
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse_urlparse,
|
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
|
parse_duration,
|
||||||
|
str_to_int,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class XTubeIE(InfoExtractor):
|
class XTubeIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))'
|
_VALID_URL = r'https?://(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_',
|
'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_',
|
||||||
'file': 'kVTUy_G222_.mp4',
|
|
||||||
'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab',
|
'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
"title": "strange erotica",
|
'id': 'kVTUy_G222_',
|
||||||
"description": "surreal gay themed erotica...almost an ET kind of thing",
|
'ext': 'mp4',
|
||||||
"uploader": "greenshowers",
|
'title': 'strange erotica',
|
||||||
"age_limit": 18,
|
'description': 'surreal gay themed erotica...almost an ET kind of thing',
|
||||||
|
'uploader': 'greenshowers',
|
||||||
|
'duration': 450,
|
||||||
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -32,25 +36,79 @@ class XTubeIE(InfoExtractor):
|
|||||||
req.add_header('Cookie', 'age_verified=1')
|
req.add_header('Cookie', 'age_verified=1')
|
||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<div class="p_5px[^>]*>([^<]+)', webpage, 'title')
|
video_title = self._html_search_regex(r'<p class="title">([^<]+)', webpage, 'title')
|
||||||
video_uploader = self._html_search_regex(r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False)
|
video_uploader = self._html_search_regex(
|
||||||
video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, 'description', fatal=False)
|
r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False)
|
||||||
video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, 'video_url').replace('\\/', '/')
|
video_description = self._html_search_regex(
|
||||||
path = compat_urllib_parse_urlparse(video_url).path
|
r'<p class="fieldsDesc">([^<]+)', webpage, 'description', fatal=False)
|
||||||
extension = os.path.splitext(path)[1][1:]
|
duration = parse_duration(self._html_search_regex(
|
||||||
format = path.split('/')[5].split('_')[:2]
|
r'<span class="bold">Runtime:</span> ([^<]+)</p>', webpage, 'duration', fatal=False))
|
||||||
format[0] += 'p'
|
view_count = self._html_search_regex(
|
||||||
format[1] += 'k'
|
r'<span class="bold">Views:</span> ([\d,\.]+)</p>', webpage, 'view count', fatal=False)
|
||||||
format = "-".join(format)
|
if view_count:
|
||||||
|
view_count = str_to_int(view_count)
|
||||||
|
comment_count = self._html_search_regex(
|
||||||
|
r'<div id="commentBar">([\d,\.]+) Comments</div>', webpage, 'comment count', fatal=False)
|
||||||
|
if comment_count:
|
||||||
|
comment_count = str_to_int(comment_count)
|
||||||
|
|
||||||
|
player_quality_option = json.loads(self._html_search_regex(
|
||||||
|
r'playerQualityOption = ({.+?});', webpage, 'player quality option'))
|
||||||
|
|
||||||
|
QUALITIES = ['3gp', 'mp4_normal', 'mp4_high', 'flv', 'mp4_ultra', 'mp4_720', 'mp4_1080']
|
||||||
|
formats = [
|
||||||
|
{
|
||||||
|
'url': furl,
|
||||||
|
'format_id': format_id,
|
||||||
|
'preference': QUALITIES.index(format_id) if format_id in QUALITIES else -1,
|
||||||
|
} for format_id, furl in player_quality_option.items()
|
||||||
|
]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
'url': video_url,
|
'duration': duration,
|
||||||
'ext': extension,
|
'view_count': view_count,
|
||||||
'format': format,
|
'comment_count': comment_count,
|
||||||
'format_id': format,
|
'formats': formats,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class XTubeUserIE(InfoExtractor):
|
||||||
|
IE_DESC = 'XTube user profile'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?xtube\.com/community/profile\.php\?(.*?)user=(?P<username>[^&#]+)(?:$|[&#])'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
username = mobj.group('username')
|
||||||
|
|
||||||
|
profile_page = self._download_webpage(
|
||||||
|
url, username, note='Retrieving profile page')
|
||||||
|
|
||||||
|
video_count = int(self._search_regex(
|
||||||
|
r'<strong>%s\'s Videos \(([0-9]+)\)</strong>'%username, profile_page,
|
||||||
|
'video count'))
|
||||||
|
|
||||||
|
PAGE_SIZE = 25
|
||||||
|
urls = []
|
||||||
|
page_count = (video_count + PAGE_SIZE + 1) // PAGE_SIZE
|
||||||
|
for n in range(1, page_count + 1):
|
||||||
|
lpage_url = 'http://www.xtube.com/user_videos.php?page=%d&u=%s' % (n, username)
|
||||||
|
lpage = self._download_webpage(
|
||||||
|
lpage_url, username,
|
||||||
|
note='Downloading page %d/%d' % (n, page_count))
|
||||||
|
urls.extend(
|
||||||
|
re.findall(r'addthis:url="([^"]+)"', lpage))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': username,
|
||||||
|
'entries': [{
|
||||||
|
'_type': 'url',
|
||||||
|
'url': eurl,
|
||||||
|
'ie_key': 'XTube',
|
||||||
|
} for eurl in urls]
|
||||||
|
}
|
||||||
|
@ -1,3 +1,6 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
@ -17,24 +20,25 @@ from ..aes import (
|
|||||||
|
|
||||||
|
|
||||||
class YouPornIE(InfoExtractor):
|
class YouPornIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
|
_VALID_URL = r'^(?P<proto>https?://)(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
|
'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
|
||||||
u'file': u'505835.mp4',
|
'md5': '71ec5fcfddacf80f495efa8b6a8d9a89',
|
||||||
u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '505835',
|
||||||
u"upload_date": u"20101221",
|
'ext': 'mp4',
|
||||||
u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
|
'upload_date': '20101221',
|
||||||
u"uploader": u"Ask Dan And Jennifer",
|
'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
|
||||||
u"title": u"Sex Ed: Is It Safe To Masturbate Daily?",
|
'uploader': 'Ask Dan And Jennifer',
|
||||||
u"age_limit": 18,
|
'title': 'Sex Ed: Is It Safe To Masturbate Daily?',
|
||||||
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('videoid')
|
video_id = mobj.group('videoid')
|
||||||
url = 'http://www.' + mobj.group('url')
|
url = mobj.group('proto') + 'www.' + mobj.group('url')
|
||||||
|
|
||||||
req = compat_urllib_request.Request(url)
|
req = compat_urllib_request.Request(url)
|
||||||
req.add_header('Cookie', 'age_verified=1')
|
req.add_header('Cookie', 'age_verified=1')
|
||||||
@ -42,7 +46,7 @@ class YouPornIE(InfoExtractor):
|
|||||||
age_limit = self._rta_search(webpage)
|
age_limit = self._rta_search(webpage)
|
||||||
|
|
||||||
# Get JSON parameters
|
# Get JSON parameters
|
||||||
json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
|
json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, 'JSON parameters')
|
||||||
try:
|
try:
|
||||||
params = json.loads(json_params)
|
params = json.loads(json_params)
|
||||||
except:
|
except:
|
||||||
@ -61,7 +65,7 @@ class YouPornIE(InfoExtractor):
|
|||||||
# Get all of the links from the page
|
# Get all of the links from the page
|
||||||
DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
|
DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
|
||||||
download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
|
download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
|
||||||
webpage, u'download list').strip()
|
webpage, 'download list').strip()
|
||||||
LINK_RE = r'<a href="([^"]+)">'
|
LINK_RE = r'<a href="([^"]+)">'
|
||||||
links = re.findall(LINK_RE, download_list_html)
|
links = re.findall(LINK_RE, download_list_html)
|
||||||
|
|
||||||
@ -86,7 +90,7 @@ class YouPornIE(InfoExtractor):
|
|||||||
resolution = format_parts[0]
|
resolution = format_parts[0]
|
||||||
height = int(resolution[:-len('p')])
|
height = int(resolution[:-len('p')])
|
||||||
bitrate = int(format_parts[1][:-len('k')])
|
bitrate = int(format_parts[1][:-len('k')])
|
||||||
format = u'-'.join(format_parts) + u'-' + dn
|
format = '-'.join(format_parts) + '-' + dn
|
||||||
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
@ -176,32 +176,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
# 3d videos
|
# 3d videos
|
||||||
'82': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': '3D', 'preference': -20},
|
'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
|
||||||
'83': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': '3D', 'preference': -20},
|
'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
|
||||||
'84': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': '3D', 'preference': -20},
|
'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
|
||||||
'85': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': '3D', 'preference': -20},
|
'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
|
||||||
'100': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': '3D', 'preference': -20},
|
'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
|
||||||
'101': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': '3D', 'preference': -20},
|
'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
|
||||||
'102': {'ext': 'webm', 'height': 720, 'resolution': '720p', 'format_note': '3D', 'preference': -20},
|
'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
|
||||||
|
|
||||||
# Apple HTTP Live Streaming
|
# Apple HTTP Live Streaming
|
||||||
'92': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'HLS', 'preference': -10},
|
'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
|
||||||
'93': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'HLS', 'preference': -10},
|
'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
|
||||||
'94': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'HLS', 'preference': -10},
|
'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
|
||||||
'95': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'HLS', 'preference': -10},
|
'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
|
||||||
'96': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'HLS', 'preference': -10},
|
'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
|
||||||
'132': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'HLS', 'preference': -10},
|
'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
|
||||||
'151': {'ext': 'mp4', 'height': 72, 'resolution': '72p', 'format_note': 'HLS', 'preference': -10},
|
'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
|
||||||
|
|
||||||
# DASH mp4 video
|
# DASH mp4 video
|
||||||
'133': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'DASH video', 'preference': -40},
|
'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||||
'134': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'DASH video', 'preference': -40},
|
'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||||
'135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'preference': -40},
|
'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||||
'136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'preference': -40},
|
'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||||
'137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},
|
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||||
'138': {'ext': 'mp4', 'height': 2160, 'resolution': '2160p', 'format_note': 'DASH video', 'preference': -40},
|
'138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||||
'160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40},
|
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||||
'264': {'ext': 'mp4', 'height': 1440, 'resolution': '1440p', 'format_note': 'DASH video', 'preference': -40},
|
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||||
|
|
||||||
# Dash mp4 audio
|
# Dash mp4 audio
|
||||||
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
|
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
|
||||||
@ -209,19 +209,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
|
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
|
||||||
|
|
||||||
# Dash webm
|
# Dash webm
|
||||||
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||||
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||||
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||||
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||||
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||||
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||||
'242': {'ext': 'webm', 'height': 240, 'resolution': '240p', 'format_note': 'DASH webm', 'preference': -40},
|
'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH webm', 'preference': -40},
|
||||||
'243': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': 'DASH webm', 'preference': -40},
|
'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH webm', 'preference': -40},
|
||||||
'244': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
|
'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
|
||||||
'245': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
|
'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
|
||||||
'246': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
|
'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
|
||||||
'247': {'ext': 'webm', 'height': 720, 'resolution': '720p', 'format_note': 'DASH webm', 'preference': -40},
|
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH webm', 'preference': -40},
|
||||||
'248': {'ext': 'webm', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH webm', 'preference': -40},
|
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH webm', 'preference': -40},
|
||||||
|
|
||||||
# Dash webm audio
|
# Dash webm audio
|
||||||
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 48, 'preference': -50},
|
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 48, 'preference': -50},
|
||||||
@ -1130,14 +1130,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
|
return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
proto = (
|
||||||
|
u'http' if self._downloader.params.get('prefer_insecure', False)
|
||||||
|
else u'https')
|
||||||
|
|
||||||
# Extract original video URL from URL with redirection, like age verification, using next_url parameter
|
# Extract original video URL from URL with redirection, like age verification, using next_url parameter
|
||||||
mobj = re.search(self._NEXT_URL_RE, url)
|
mobj = re.search(self._NEXT_URL_RE, url)
|
||||||
if mobj:
|
if mobj:
|
||||||
url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
|
url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
|
||||||
video_id = self.extract_id(url)
|
video_id = self.extract_id(url)
|
||||||
|
|
||||||
# Get video webpage
|
# Get video webpage
|
||||||
url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
|
url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
|
||||||
video_webpage = self._download_webpage(url, video_id)
|
video_webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
# Attempt to extract SWF player URL
|
# Attempt to extract SWF player URL
|
||||||
@ -1162,7 +1166,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'asv': 3,
|
'asv': 3,
|
||||||
'sts':'1588',
|
'sts':'1588',
|
||||||
})
|
})
|
||||||
video_info_url = 'https://www.youtube.com/get_video_info?' + data
|
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
|
||||||
video_info_webpage = self._download_webpage(video_info_url, video_id,
|
video_info_webpage = self._download_webpage(video_info_url, video_id,
|
||||||
note=False,
|
note=False,
|
||||||
errnote='unable to download video info webpage')
|
errnote='unable to download video info webpage')
|
||||||
@ -1170,7 +1174,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
else:
|
else:
|
||||||
age_gate = False
|
age_gate = False
|
||||||
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
|
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
|
||||||
video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
|
video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
|
||||||
% (video_id, el_type))
|
% (video_id, el_type))
|
||||||
video_info_webpage = self._download_webpage(video_info_url, video_id,
|
video_info_webpage = self._download_webpage(video_info_url, video_id,
|
||||||
note=False,
|
note=False,
|
||||||
@ -1285,10 +1289,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
# Decide which formats to download
|
# Decide which formats to download
|
||||||
try:
|
try:
|
||||||
mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
|
mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
|
||||||
if not mobj:
|
if not mobj:
|
||||||
raise ValueError('Could not find vevo ID')
|
raise ValueError('Could not find vevo ID')
|
||||||
ytplayer_config = json.loads(mobj.group(1))
|
json_code = uppercase_escape(mobj.group(1))
|
||||||
|
ytplayer_config = json.loads(json_code)
|
||||||
args = ytplayer_config['args']
|
args = ytplayer_config['args']
|
||||||
# Easy way to know if the 's' value is in url_encoded_fmt_stream_map
|
# Easy way to know if the 's' value is in url_encoded_fmt_stream_map
|
||||||
# this signatures are encrypted
|
# this signatures are encrypted
|
||||||
@ -1444,7 +1449,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'duration': video_duration,
|
'duration': video_duration,
|
||||||
'age_limit': 18 if age_gate else 0,
|
'age_limit': 18 if age_gate else 0,
|
||||||
'annotations': video_annotations,
|
'annotations': video_annotations,
|
||||||
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
|
'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'like_count': like_count,
|
'like_count': like_count,
|
||||||
'dislike_count': dislike_count,
|
'dislike_count': dislike_count,
|
||||||
@ -1645,7 +1650,7 @@ class YoutubeChannelIE(InfoExtractor):
|
|||||||
|
|
||||||
class YoutubeUserIE(InfoExtractor):
|
class YoutubeUserIE(InfoExtractor):
|
||||||
IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
|
IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
|
||||||
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
|
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
|
||||||
_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
|
_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
|
||||||
_GDATA_PAGE_SIZE = 50
|
_GDATA_PAGE_SIZE = 50
|
||||||
_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
|
_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
|
||||||
@ -1744,12 +1749,50 @@ class YoutubeSearchIE(SearchInfoExtractor):
|
|||||||
for video_id in video_ids]
|
for video_id in video_ids]
|
||||||
return self.playlist_result(videos, query)
|
return self.playlist_result(videos, query)
|
||||||
|
|
||||||
|
|
||||||
class YoutubeSearchDateIE(YoutubeSearchIE):
|
class YoutubeSearchDateIE(YoutubeSearchIE):
|
||||||
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
|
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
|
||||||
_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
|
_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
|
||||||
_SEARCH_KEY = 'ytsearchdate'
|
_SEARCH_KEY = 'ytsearchdate'
|
||||||
IE_DESC = u'YouTube.com searches, newest videos first'
|
IE_DESC = u'YouTube.com searches, newest videos first'
|
||||||
|
|
||||||
|
|
||||||
|
class YoutubeSearchURLIE(InfoExtractor):
|
||||||
|
IE_DESC = u'YouTube.com search URLs'
|
||||||
|
IE_NAME = u'youtube:search_url'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
query = compat_urllib_parse.unquote_plus(mobj.group('query'))
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, query)
|
||||||
|
result_code = self._search_regex(
|
||||||
|
r'(?s)<ol id="search-results"(.*?)</ol>', webpage, u'result HTML')
|
||||||
|
|
||||||
|
part_codes = re.findall(
|
||||||
|
r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
|
||||||
|
entries = []
|
||||||
|
for part_code in part_codes:
|
||||||
|
part_title = self._html_search_regex(
|
||||||
|
r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False)
|
||||||
|
part_url_snippet = self._html_search_regex(
|
||||||
|
r'(?s)href="([^"]+)"', part_code, 'item URL')
|
||||||
|
part_url = compat_urlparse.urljoin(
|
||||||
|
'https://www.youtube.com/', part_url_snippet)
|
||||||
|
entries.append({
|
||||||
|
'_type': 'url',
|
||||||
|
'url': part_url,
|
||||||
|
'title': part_title,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'entries': entries,
|
||||||
|
'title': query,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class YoutubeShowIE(InfoExtractor):
|
class YoutubeShowIE(InfoExtractor):
|
||||||
IE_DESC = u'YouTube.com (multi-season) shows'
|
IE_DESC = u'YouTube.com (multi-season) shows'
|
||||||
_VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
|
_VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
|
||||||
|
@ -1,11 +1,13 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import calendar
|
||||||
import contextlib
|
import contextlib
|
||||||
import ctypes
|
import ctypes
|
||||||
import datetime
|
import datetime
|
||||||
import email.utils
|
import email.utils
|
||||||
import errno
|
import errno
|
||||||
|
import getpass
|
||||||
import gzip
|
import gzip
|
||||||
import itertools
|
import itertools
|
||||||
import io
|
import io
|
||||||
@ -22,6 +24,7 @@ import struct
|
|||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
|
import xml.etree.ElementTree
|
||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -499,13 +502,13 @@ def orderedSet(iterable):
|
|||||||
res.append(el)
|
res.append(el)
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def unescapeHTML(s):
|
|
||||||
"""
|
|
||||||
@param s a string
|
|
||||||
"""
|
|
||||||
assert type(s) == type(u'')
|
|
||||||
|
|
||||||
result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
|
def unescapeHTML(s):
|
||||||
|
if s is None:
|
||||||
|
return None
|
||||||
|
assert type(s) == compat_str
|
||||||
|
|
||||||
|
result = re.sub(r'(?u)&(.+?);', htmlentity_transform, s)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
@ -759,8 +762,37 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
|||||||
https_response = http_response
|
https_response = http_response
|
||||||
|
|
||||||
|
|
||||||
|
def parse_iso8601(date_str):
|
||||||
|
""" Return a UNIX timestamp from the given date """
|
||||||
|
|
||||||
|
if date_str is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
m = re.search(
|
||||||
|
r'Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$',
|
||||||
|
date_str)
|
||||||
|
if not m:
|
||||||
|
timezone = datetime.timedelta()
|
||||||
|
else:
|
||||||
|
date_str = date_str[:-len(m.group(0))]
|
||||||
|
if not m.group('sign'):
|
||||||
|
timezone = datetime.timedelta()
|
||||||
|
else:
|
||||||
|
sign = 1 if m.group('sign') == '+' else -1
|
||||||
|
timezone = datetime.timedelta(
|
||||||
|
hours=sign * int(m.group('hours')),
|
||||||
|
minutes=sign * int(m.group('minutes')))
|
||||||
|
|
||||||
|
dt = datetime.datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S') - timezone
|
||||||
|
return calendar.timegm(dt.timetuple())
|
||||||
|
|
||||||
|
|
||||||
def unified_strdate(date_str):
|
def unified_strdate(date_str):
|
||||||
"""Return a string with the date in the format YYYYMMDD"""
|
"""Return a string with the date in the format YYYYMMDD"""
|
||||||
|
|
||||||
|
if date_str is None:
|
||||||
|
return None
|
||||||
|
|
||||||
upload_date = None
|
upload_date = None
|
||||||
#Replace commas
|
#Replace commas
|
||||||
date_str = date_str.replace(',', ' ')
|
date_str = date_str.replace(',', ' ')
|
||||||
@ -777,6 +809,7 @@ def unified_strdate(date_str):
|
|||||||
'%Y/%m/%d %H:%M:%S',
|
'%Y/%m/%d %H:%M:%S',
|
||||||
'%Y-%m-%d %H:%M:%S',
|
'%Y-%m-%d %H:%M:%S',
|
||||||
'%d.%m.%Y %H:%M',
|
'%d.%m.%Y %H:%M',
|
||||||
|
'%d.%m.%Y %H.%M',
|
||||||
'%Y-%m-%dT%H:%M:%SZ',
|
'%Y-%m-%dT%H:%M:%SZ',
|
||||||
'%Y-%m-%dT%H:%M:%S.%fZ',
|
'%Y-%m-%dT%H:%M:%S.%fZ',
|
||||||
'%Y-%m-%dT%H:%M:%S.%f0Z',
|
'%Y-%m-%dT%H:%M:%S.%f0Z',
|
||||||
@ -1119,11 +1152,11 @@ def setproctitle(title):
|
|||||||
libc = ctypes.cdll.LoadLibrary("libc.so.6")
|
libc = ctypes.cdll.LoadLibrary("libc.so.6")
|
||||||
except OSError:
|
except OSError:
|
||||||
return
|
return
|
||||||
title = title
|
title_bytes = title.encode('utf-8')
|
||||||
buf = ctypes.create_string_buffer(len(title) + 1)
|
buf = ctypes.create_string_buffer(len(title_bytes))
|
||||||
buf.value = title.encode('utf-8')
|
buf.value = title_bytes
|
||||||
try:
|
try:
|
||||||
libc.prctl(15, ctypes.byref(buf), 0, 0, 0)
|
libc.prctl(15, buf, 0, 0, 0)
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
return # Strange libc, just skip this
|
return # Strange libc, just skip this
|
||||||
|
|
||||||
@ -1263,3 +1296,35 @@ def read_batch_urls(batch_fd):
|
|||||||
|
|
||||||
with contextlib.closing(batch_fd) as fd:
|
with contextlib.closing(batch_fd) as fd:
|
||||||
return [url for url in map(fixup, fd) if url]
|
return [url for url in map(fixup, fd) if url]
|
||||||
|
|
||||||
|
|
||||||
|
def urlencode_postdata(*args, **kargs):
|
||||||
|
return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
|
||||||
|
|
||||||
|
|
||||||
|
def parse_xml(s):
|
||||||
|
class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
|
||||||
|
def doctype(self, name, pubid, system):
|
||||||
|
pass # Ignore doctypes
|
||||||
|
|
||||||
|
parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
|
||||||
|
kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
|
||||||
|
return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
if sys.version_info < (3, 0) and sys.platform == 'win32':
|
||||||
|
def compat_getpass(prompt, *args, **kwargs):
|
||||||
|
if isinstance(prompt, compat_str):
|
||||||
|
prompt = prompt.encode(preferredencoding())
|
||||||
|
return getpass.getpass(prompt, *args, **kwargs)
|
||||||
|
else:
|
||||||
|
compat_getpass = getpass.getpass
|
||||||
|
|
||||||
|
|
||||||
|
US_RATINGS = {
|
||||||
|
'G': 0,
|
||||||
|
'PG': 10,
|
||||||
|
'PG-13': 13,
|
||||||
|
'R': 16,
|
||||||
|
'NC': 18,
|
||||||
|
}
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2014.02.27.1'
|
__version__ = '2013.03.24.2'
|
||||||
|
Reference in New Issue
Block a user