Requiere: - aria2c - wget Extras: Ofrece Descargar el resto de los videos Uso: # Comillas requeridas, problema parser cvh_video.py "http://www.chilevision.cl/home/index.php?option=com_content&task=view&id=YYYYY&Itemid=XXX"
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | #!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# cvh_video.py
#
# Copyright 2010 Javier Rovegno Campos <tatadeluxe<at>gmail.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
"""
Script para descargar videos desde http://www.chilevision.cl/
Requiere:
- aria2c - wget
Extras:
Ofrece Descargar el resto de los videos
Uso:
# Comillas requeridas, problema parser
cvh_video.py "http://www.chilevision.cl/home/index.php?option=com_content&task=view&id=YYYYY&Itemid=XXX"
"""
import urllib
import re
import sys
import os
import getopt
import commands
import signal
def main():
# parse command line options
try:
opts, args = getopt.getopt(sys.argv[1:], "h", ["help"])
except getopt.error, msg:
print msg
print "for help use --help"
sys.exit(2)
# process options
for o, a in opts:
if o in ("-h", "--help"):
print __doc__
sys.exit(0)
# process arguments
for arg in args:
process(arg) # process() is defined elsewhere
def process(arg):
'''
>>> process('http://www.chilevision.cl/home/index.php?option=com_content&task=view&id=YYYYY&Itemid=XXX')
Traceback (most recent call last):
...
ValueError: Descarga video fallida, url no contiene video
'''
url_file = extrae_url_file(arg)
is_ok = os.system('aria2c "%s"'%url_file)
if is_ok == 0:
print 'Descarga video realizada con éxito!'
log_file(url_file)
if pregunta_download_more():
download_more(url_file)
else:
print 'La opción que elige es no descargar videos similares'
elif is_ok == 32512:
print 'Recomendamos que instale aria2c para acelerar la descarga'
is_ok = os.system('wget "%s"'%url_file)
log_file(url_file)
else:
print 'Descarga video fallida!'
def pregunta_download_more():
return raw_input_timer('Descargar el resto de los videos similares s/(n): ',5) == 's\n'
def download_more(url_file):
nn = ['01','02','03','04','05','06','07','08','09']
for i in xrange(10,50):
nn.append(str(i))
ext = url_file[-4:]
pref = url_file[:-6]
errores = 0
video_url_files = ''
for n in nn:
url_file_aux = '%s%s%s '%(pref,n,ext)
if verifica_url_file(url_file_aux):
log_file(url_file_aux)
video_url_files += '%s '%(url_file_aux)
elif errores <= 3:
#Tolera hasta 3 errores
errores += 1
else:
break
if video_url_files != '':
is_ok = os.system('aria2c -Z %s'%video_url_files)
else:
print "No hay más videos similares para descargar"
def verifica_url_file(url_file_aux):
return urllib.urlopen(url_file_aux).info().typeheader == 'application/octet-stream'
def extrae_url_file(url):
sock = urllib.urlopen(url)
htmlSource = sock.read()
sock.close()
try:
#Trata de buscar inicio url con video
video_url_init = htmlSource.index('playerCHV(') + 11
except ValueError:
#Busca los enlaces hacia url con videos
coleccion = re.findall("\d{,8}&Itemid=2389", htmlSource)
video_url_list = ''
for id in coleccion:
#Agrega los id en una lista con los enlaces
video_url_list += ('"http://www.chilevision.cl/home/index.php?option=com_content&task=view&id=%s&Itemid=2389" '
%id[:-12])
if video_url_list != '':
#Si encuentra alguna enlace hacia video válido los imprime en pantalla
print video_url_list
sys.exit(0)
else:
#No encuentra url con videos
raise ValueError, "Descarga video fallida, url no contiene video"
video_url_end = htmlSource.index('\'',video_url_init)
video_url = htmlSource[video_url_init:video_url_end]
video_url = video_url.replace('%2F','/')
return video_url
def log_file(url_file):
#Añade enlaces con videos descargados
log = open('log_chv_video.txt', 'a')
log.write("%s\n"%url_file)
log.close()
def alarm_handler(*args):
raise Exception("timeout")
#
# name: raw_input_timer
# @param str solicita dato,int seg
# @return texto
def raw_input_timer(prompt, timeout):
signal.signal(signal.SIGALRM, alarm_handler)
signal.alarm(timeout)
sys.stdout.write(prompt)
sys.stdout.flush()
try:
text = sys.stdin.readline()
except:
text = ""
signal.alarm(0)
return text
if __name__ == '__main__':
import doctest
doctest.testmod()
main()
|