urllib.urlretrieve(url, filename, reporthook=None,data=None)
参数说明:
url:外部或者本地url
filename:指定了保存到本地的路径(如果未指定该参数,urllib会生成一个临时文件来保存数据);
reporthook:是一个回调函数,当连接上服务器、以及相应的数据块传输完毕的时候会触发该回调。我们可以利用这个回调函数来显示当前的下载进度。
data:指post到服务器的数据。该方法返回一个包含两个元素的元组(filename, headers),filename表示保存到本地的路径,header表示服务器的响应头。
下面通过例子来演示一下这个方法的使用,这个例子将新浪首页的html抓取到本地,保存在D:/sina.html文件中,同时显示下载的进度。
import urllib
def callbackfunc(blocknum, blocksize, totalsize):
'''回调函数
@blocknum: 已经下载的数据块
@blocksize: 数据块的大小
@totalsize: 远程文件的大小
'''
percent = 100.0 * blocknum * blocksize / totalsize
if percent > 100:
percent = 100
print "%.2f%%"% percent
url = 'http://www.sina.com.cn'
local = 'd:\\sina.html'
urllib.urlretrieve(url, local, callbackfunc)
liburllibrequest.py
修改
133行添加'zjsurlretrieve'
'urlretrieve','zjsurlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
229行 添加自定义函数
zjs_url_tempfiles = []
def zjsurlretrieve(url, filename=None, reporthook=None, data=None):
"""
Retrieve a URL into a temporary location on disk.
Requires a URL argument. If a filename is passed, it is used as
the temporary file location. The reporthook argument should be
a callable that accepts a block number, a read size, and the
total file size of the URL target. The data argument should be
valid URL encoded data.
If a filename is passed and the URL points to a local resource,
the result is a copy from local file to new file.
Returns a tuple containing the path to the newly created
data file as well as the resulting HTTPMessage object.
"""
url_type, path = splittype(url)
with contextlib.closing(urlopen(url, data)) as fp:
headers = fp.info()
# Just return the local path and the "headers" for file://
# URLs. No sense in performing a copy unless requested.
if url_type == "file" and not filename:
return os.path.normpath(path), headers
# Handle temporary file setup.
if filename:
tfp = open(filename, 'wb')
else:
tfp = tempfile.NamedTemporaryFile(delete=False)
filename = tfp.name
_url_tempfiles.append(filename)
with tfp:
result = filename, headers
bs = 1024*8
size = -1
read = 0
blocknum = 0
if "content-length" in headers:
size = int(headers["Content-Length"])
if reporthook:
reporthook(blocknum, bs, size,url,filename)
while True:
block = fp.read(bs)
if not block:
break
read += len(block)
tfp.write(block)
blocknum += 1
if reporthook:
reporthook(blocknum, bs, size,url,filename)
if size >= 0 and read < size:
raise ContentTooShortError(
"retrieval incomplete: got only %i out of %i bytes"
% (read, size), result)
return result
import urllib
def callbackfunc(blocknum, blocksize, totalsize,url,filename):
'''回调函数
@blocknum: 已经下载的数据块
@blocksize: 数据块的大小
@totalsize: 远程文件的大小
'''
percent = 100.0 * blocknum * blocksize / totalsize
if percent > 100:
percent = 100
print "%.2f%%"% percent + filename
url = 'http://www.sina.com.cn'
local = 'd:\\sina.html'
urllib.zjsurlretrieve(url, local, callbackfunc)