A fast multi-part file downloader ala FlashGet, GetRight, Gozilla, etc. Currently only supports HTTP but FTP wouldn't be hard. Use this instead of wget. ;-)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 | # mget.py
#
# by Nelson Rush
#
# MIT/X license.
#
# A simple program to download files in segments.
#
# - Fixes by David Loaiza for Python 2.5 added.
# - Nelson added fixes to bring the code to 2.7 and add portability between Windows/Linux.
# - The output of segment information has been corrected and cleaned up.
# - In outside cases where the client instances were not being closed, they are now closed.
#
import sys
import os
import asyncore
import socket
import platform
from string import *
from math import *
from time import *
from mmap import *
platformName = platform.system()
SEEK_BEG = 0
SEEK_SET = 1
SEEK_END = 2
class http_client(asyncore.dispatcher):
def __init__ (self,host,path,parts,pbegin=0,pend=0,m=None):
asyncore.dispatcher.__init__(self)
# Initialize class member variables.
self.keepalive = False
self.done = 0
self.h = [self]
self.recvhead = 1
self.bytes = 0
self.ack = 0
self.begin = time()
self.path = path
self.parts = parts
self.host = host
self.buffer = ""
self.pbegin = pbegin
self.pend = pend
self.length = 8192
self.f = None
# Grab the filename from the end of the URL.
self.filename = split(path,"/")[-1]
# Check if file exists and if so ask if overwrite necessary.
if os.access(self.filename,os.O_RDWR) and self.parts > 0:
u = raw_input("File already exists, overwrite? [y/N] ")
if u == 'y' or u == 'Y':
print "Overwriting..."
else:
print "Aborting..."
return None
# Connect to the host with it on port 80.
print "Connecting..."
self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
self.connect((host, 80))
# Parts are greater than 0 so we are the parent, open file, get header.
if self.parts > 0:
# Open and memory map the file.
self.f = open(self.filename,'wb+')
self.f.write("\0")
self.f.flush() # We have to flush to make the file buffer ready for mmap.
# Windows uses 0 for second parameter to auto-size to current file size.
# Whereas, on Linux and other platforms, mmap requires a size.
if platformName == "Windows":
self.m = mmap(self.f.fileno(), 0)
else:
self.m = mmap(self.f.fileno(), os.fstat(self.f.fileno()).st_size)
# Download the header.
self.buffer = "HEAD %s HTTP/1.1\r\nHost: %s\r\n\r\n" % (self.path,self.host)
print "Downloading http://%s%s" % (self.host,self.path)
# Otherwise, we are a child, skip the header and download our segment.
elif self.parts == 0:
# Set our own mmap to the one given to us by the parent.
self.m = m
# Prepare ourselves to download the segment.
self.bytes = self.pbegin
self.length = self.pend
self.recvhead = 2
self.buffer = "GET %s HTTP/1.1\r\nHost: %s\r\nRange: bytes=%lu-%lu\r\n\r\n" % (self.path,self.host,self.pbegin,self.pend)
print self.buffer
def handle_connect(self):
pass
def handle_read(self):
# Recieve incoming data.
data = self.recv(8192)
# Handle recieving the header, stage 1.
if self.recvhead == 1:
self.head = data
print self.head
# If the file was not found, exit.
if find(data,"404 Not Found") > -1:
print "404 Not Found"
self.close()
self.m.close()
self.f.close()
return None
# Was it found, if not just check if OK.
if find(data,"302 Found") == -1:
# If we did not recieve the OK, exit.
if find(data,"200 OK") == -1:
print "Unable to continue download."
self.close()
self.m.close()
self.f.close()
return None
# If we cannot determine the length of the file, exit.
if find(data,"Content-Length") == -1:
print "Cannot determine size."
self.close()
self.m.close()
self.f.close()
return None
# Determine the length of the file.
line = self.head[find(self.head,"Content-Length"):]
line = line[:find(line,"\r\n")]
line = line[find(line,":")+1:]
self.length = int(line)
self.m.resize(self.length)
self.recvhead = 2
# If the number of parts is 1, only get the file.
if self.parts == 1:
self.buffer = "GET %s HTTP/1.1\r\nHost: %s\r\n\r\n" % (self.path,self.host)
print self.buffer
self.pbegin = 0
self.pend = self.length
# If the parts is greater than 1, split into segments.
elif self.parts > 1:
l = self.length / self.parts
print "Segment size =",l
# Download the other segments in separate instances.
if self.parts == 2:
self.h.append(http_client(self.host,self.path,0,l + 1,self.length,self.m))
if self.parts > 2:
for i in range(1,self.parts-1):
self.h.append(http_client(self.host,self.path,0,(i * l) + 1,(i+1) * l,self.m))
self.h.append(http_client(self.host,self.path,0,((i+1) * l) + 1,self.length,self.m))
# Set up the parent download, from beginning of file to segment size.
self.buffer = "GET %s HTTP/1.1\r\nHost: %s\r\nRange: bytes=0-%lu\r\n\r\n" % (self.path,self.host,l)
self.length = l
self.pbegin = 0
self.pend = self.length
print self.buffer
# Stage 2, clip the second incoming header and start grabbing the file itself.
elif self.recvhead == 2:
# A blank line specifies the end of the header.
body = data[find(data,"\r\n\r\n")+4:]
size = len(body)
if size > 0:
# Write what we have to the file.
self.m[self.bytes:self.bytes+size] = body
self.bytes += size
# Keep track of position and inform the user every 1k downloaded.
if len(xrange(size / 1024)) == 0:
self.ack = size
else:
print "Segment %7lu-%7lu\t\ts%7lu to %7lu bytes recieved" % (self.pbegin,self.pend,self.bytes-size,self.bytes-1)
if self.bytes >= self.length:
self.complete = time()
self.close()
self.recvhead = 0
# Just download the rest of the file.
else:
size = len(data)
dataend = self.bytes + size
self.m[self.bytes:dataend] = data
self.bytes += size
# Keep track of position and inform the user every 1k downloaded.
if len(xrange(size / 1024)) == 0:
self.ack += size
else:
print "Segment %7lu-%7lu\t\t%7lu to %7lu bytes recieved" % (self.pbegin,self.pend,self.bytes-size,self.bytes-1)
if len(range(self.ack / 1024)) > 0:
print "Segment %7lu-%7lu\t\t%7lu to %7lu bytes recieved" % (self.pbegin,self.pend,self.bytes-size,self.bytes-1)
self.ack -= (1024 * len(xrange(self.ack / 1024)))
if self.bytes >= self.length:
self.complete = time()
self.close()
# Check to see if the buffer is clear.
def writable(self):
return(len(self.buffer) > 0)
# Handle transmission of the data.
def handle_write(self):
sent = self.send(self.buffer)
self.buffer = self.buffer[sent:]
# Handle closing of the connection.
def handle_close(self):
self.complete = time()
if self.bytes > self.length:
self.bytes = self.bytes - 1
print "Segment %7lu-%7lu\t\t%7lu to %7lu bytes recieved" % (self.pbegin,self.pend,self.bytes,self.length)
self.close()
# Main
if __name__ == '__main__':
from urlparse import *
if len(sys.argv) < 2:
print 'usage: %s host' % sys.argv[0]
else:
url = sys.argv[1]
if find(url,"http://") == -1:
url = "http://" + url
url = urlparse(url)
client = http_client(url[1],url[2],3)
asyncore.loop()
client.m.close()
client.f.close()
print "Client download finished."
|
Handy for downloading files. Could be extended to support new features; user-friendly GUI, helper segments (which help out other segments if they're finished themselves), password support, archival and tracking of downloads in progess/already downloaded files, etc. Use your imagination. The first version I made of this used regular old files but I decided to go with mmap'd files instead because they're more enjoyable to work with.
This has been updated for Python 2.7.
can this be done in ActiveX Control. Hi,
I am not familiar with Python but I want to build an activeX control in which the IE can use this object to speed up the download of a resource e.g. Flash movie. If you can guide me how to write it in ActiveX Control..that will be great...but if not, I hope you can explain the concept behind this in details possible. I am sorry to bug you here but I have been searching high and low for something that can guide me and apparently I only found yours which is the one and only I can find so far.
I would really appreciate your help and guidance if you can generously help me. Thanks in advance and I really mean it.
from, Steven
Help. Hi! there Mr.Steven and Mr.Nelson, I am Santosh.M.Pillai a software engineer from india. Well friends problem is also as similar to the ones stated by Mr.Steven. I too want to make an Win32 running application and I am not familiat with Python so I am unable to understand the logic and convert the pythyon code in Win32 application. I too searched the Web for information on File segmentation and mutiple connection download, logic details but couldn't find any. If u people have any(any) information eg.URL or documentation regarding these topics please help me. I hope u people will help to solve this problem. Thanksxx in advance. Expecting a reply soon.{ pillai_santosh@indiatimes.com ]. Thankyou.
For Linux and Python 2.5. This is the the code for work on linux ...
<pre> import sys import os import asyncore import socket from string import * from math import * from time import * from mmap import *
SEEK_BEG = 0 SEEK_SET = 1 SEEK_END = 2
class http_client(asyncore.dispatcher): def __init__ (self,host,path,parts,pbegin=0,pend=0,m=None): asyncore.dispatcher.__init__(self) # Initialize class member variables. self.done = 0 self.h = [self] self.recvhead = 1 self.bytes = 0 self.ack = 0 self.begin = time() self.path = path self.parts = parts self.host = host self.buffer = "" self.pbegin = pbegin self.pend = pend self.length = 8192 self.f = None # Grab the filename from the end of the URL. self.filename = split(path,"/")[-1] # Check if file exists and if so ask if overwrite necessary. if os.access(self.filename,os.O_RDWR) and self.parts > 0: u = raw_input("File already exists, overwrite? [y/N] ") if u == 'y' or u == 'Y': print "Overwriting..." else: print "Aborting..." return None # Create a TCP/IP socket and connect to the host with it on port 80. print "Connecting..." self.create_socket(socket.AF_INET, socket.SOCK_STREAM) self.connect((host, 80)) # Parts are greater than 0 so we are the parent, open file, get header. if self.parts > 0: # Open and memory map the file. self.f = open(self.filename,'wb+') self.f.write("\0") self.f.tell() self.m = mmap(self.f.fileno(),os.fstat(self.f.fileno()).st_size) # Download the header. self.buffer = "HEAD %s HTTP/1.1\r\nHost: %s\r\n\r\n" %(self.path,self.host) print "Downloading http://%s%s" % (self.host,self.path) # Otherwise, we are a child, skip the header and download our segment. elif self.parts == 0: # Set our own mmap to the one given to us by the parent. self.m = m # Prepare ourselves to download the segment. self.bytes = self.pbegin self.length = self.pend self.recvhead = 2 self.buffer = "GET %s HTTP/1.1\r\nHost: %s\r\nRange: bytes=%lu-%lu\r\n\r\n" % (self.path,self.host,self.pbegin,self.pend) print self.buffer def handle_connect(self): pass def handle_read(self): # Recieve incoming data. data = self.recv(8192) # Handle recieving the header, stage 1. if self.recvhead == 1: self.head = data print self.head # If the file was not found, exit. if find(data,"404 Not Found") > -1: print "404 Not Found"
(comment continued...)
(...continued from previous comment)
(comment continued...)
(...continued from previous comment)
Main
if __name__ == '__main__': from urlparse import * if len(sys.argv) This is the the code for work on linux ...
<pre> import sys import os import asyncore import socket from string import * from math import * from time import * from mmap import *
SEEK_BEG = 0 SEEK_SET = 1 SEEK_END = 2
class http_client(asyncore.dispatcher): def __init__ (self,host,path,parts,pbegin=0,pend=0,m=None): asyncore.dispatcher.__init__(self) # Initialize class member variables
atoi()
is deprecated. you should useint()
insteadhow can i download files on servers that i have to wait a few secons to start de download??