#!/usr/bin/python
#coding=gbk
import re
import os
import sys
import time
import glob
import string
import socket
import getopt
import urllib
import urllib2
import threading
from sgmllib import SGMLParser
from optparse import OptionParser
# #############################################################################
# #
# # self-defined exception classes
# #
# #############################################################################
class ConnectionError(Exception): pass
class URLUnreachable(Exception):pass
class CanotDownload(Exception):pass
# #############################################################################
# #
# # multiple threads download module starts here
# #
# #############################################################################
class HttpGetThread(threading.Thread):
def __init__(self, name, url, filename, range=0):
#print " name ",name
#print " url ",url
#print " filename ",filename
#print " range ",range
threading.Thread.__init__(self, name=name)
self.name = name
self.url = url
self.filename = filename
self.range = range
self.totalLength = range[1] - range[0] +1
try:
self.downloaded = os.path.getsize(self.filename)
except OSError:
self.downloaded = 0
self.percent = self.downloaded/float(self.totalLength)*100
self.headerrange = (self.range[0]+self.downloaded, self.range[1])
self.bufferSize = 8192
def run(self):
try:
self.downloaded = os.path.getsize(self.filename)
except OSError:
self.downloaded = 0
self.percent = self.downloaded/float(self.totalLength)*100
#self.headerrange = (self.range[0]+self.downloaded, self.range[1])
self.bufferSize = 8192
#request = urllib2.Request(self.url)
#request.add_header('Range', 'bytes=%d-%d' %self.headerrange)
downloadAll = False
retries = 1
while not downloadAll:
if retries > 10:
break
try:
self.headerrange = (self.range[0]+self.downloaded, self.range[1])
request = urllib2.Request(self.url)
request.add_header('Range', 'bytes=%d-%d' %self.headerrange)
conn = urllib2.urlopen(request)
startTime = time.time()
data = conn.read(self.bufferSize)
while data:
f = open(self.filename, 'ab')
f.write(data)
f.close()
self.time = int(time.time() - startTime)
self.downloaded += len(data)
self.percent = self.downloaded/float(self.totalLength) *100
data = conn.read(self.bufferSize)
downloadAll = True
except Exception, err:
retries += 1
time.sleep(1)
continue
#分割文件方便多线程下载
def Split(size,blocks):
ranges = []
blocksize = size / blocks
for i in xrange(blocks-1):
ranges.append((i*blocksize,blocksize*i+blocksize-1))
ranges.append(( blocksize*(blocks-1), size-1))
return ranges
#获得文件大小
def GetHttpFileSize(url):
length = 0
try:
conn = urllib.urlopen(url)
headers = conn.info()
length = headers.getheader("Content-Length")
length = int(length)
print "Get File Length: %d"%length
except Exception, err:
pass
return length
def hasLive(ts):
for t in ts:
#print "/n thread name ",t.getName()," alive ",t.isAlive()
if t.isAlive():
return True
return False
#
def MyHttpGet(url, output=None, connections=4):
"""
arguments:
url, in GBK encoding
output, default encoding, do no convertion
connections, integer
"""
length = GetHttpFileSize(url)
startTime = time.time() #开始时间
#print " startTime ",startTime
mb = length/1024/1024.0
if length == 0:
raise URLUnreachable
blocks = connections
if output:
filename = output
else:
output = url.split('/')[-1]
ranges = Split(length, blocks)
names = []
#names = ["%s_%d" %(filename,i) for i in xrange(blocks)]
for i in xrange(blocks):
names.append("%s_%d" %(filename,i))
ts = []
#print "+++++++++++++++++++++++++++ blocks ",blocks
for i in xrange(blocks):
t = HttpGetThread(" 下载线程 "+str(i), url, names[i], ranges[i])
t.setDaemon(True)
t.start()
ts.append(t)
live = hasLive(ts)
startSize = sum([t.downloaded for t in ts]) # 已下载多少
#print "++++++++++ startSize ",startSize
etime = 0 #
rate = 0 # 下载速度 *
while live:
try:
etime = time.time() - startTime
d = sum([t.downloaded for t in ts])/float(length)*100
downloadedThistime = sum([t.downloaded for t in ts])-startSize
try:
rate = downloadedThistime / float(etime)/1024
except:
rate = 0.0
progressStr = u'/rFilesize: %d(%.2fM) Downloaded: %.2f%% Avg rate: %.1fKB/s' %(length, mb, d, rate)
sys.stdout.write(progressStr)
sys.stdout.flush()
#sys.stdout.write('/b'*(len(progressStr)+1))
live = hasLive(ts)
time.sleep(0.8)
except KeyboardInterrupt:
print
print "Exit..."
for n in names:
try:
os.remove(n)
except:
pass
sys.exit(1)
print
etime = time.time() - startTime
#print " endTime ",time.time()
print u'耗时: %d:%d, 平均速度:%.2fKB/s' %(int(etime)/60, int(etime)%60,rate)
f = open(filename, 'wb')
for n in names:
f.write(open(n,'rb').read())
try:
os.remove(n)
except:
pass
f.close()
if __name__ == "__main__":
parser = OptionParser()
parser.add_option("-f","--file",action="store",type="string",dest="url")
parser.add_option("-o","--output",action="store",type="string",dest="output")
(options,args) =parser.parse_args(sys.argv)
connections = 5
if options.url:
url = options.url
if options.output:
output = options.output
else:
output = os.path.basename(url)
MyHttpGet(url,output,connections);
修改网友写的下载软件
最新推荐文章于 2020-06-06 10:05:00 发布
本文介绍了一个使用Python实现的多线程HTTP下载器。该下载器通过定义自定义异常类来处理连接错误,并通过分割文件为多个块以便利用多个线程进行并发下载。此外,还提供了进度显示和错误处理功能。

518

被折叠的 条评论
为什么被折叠?



