+-

我设法让我的第一个 python脚本工作,从URL下载.ZIP文件列表,然后继续提取ZIP文件并将它们写入磁盘.
我现在无法实现下一步.
我的主要目标是下载并解压缩zip文件,并通过TCP流传递内容(CSV数据).如果我能逃脱它,我宁愿不将任何zip或解压缩的文件写入磁盘.
这是我当前的脚本,但不幸的是必须将文件写入磁盘.
import urllib, urllister
import zipfile
import urllib2
import os
import time
import pickle
# check for extraction directories existence
if not os.path.isdir('downloaded'):
os.makedirs('downloaded')
if not os.path.isdir('extracted'):
os.makedirs('extracted')
# open logfile for downloaded data and save to local variable
if os.path.isfile('downloaded.pickle'):
downloadedLog = pickle.load(open('downloaded.pickle'))
else:
downloadedLog = {'key':'value'}
# remove entries older than 5 days (to maintain speed)
# path of zip files
zipFileURL = "http://www.thewebserver.com/that/contains/a/directory/of/zip/files"
# retrieve list of URLs from the webservers
usock = urllib.urlopen(zipFileURL)
parser = urllister.URLLister()
parser.feed(usock.read())
usock.close()
parser.close()
# only parse urls
for url in parser.urls:
if "PUBLIC_P5MIN" in url:
# download the file
downloadURL = zipFileURL + url
outputFilename = "downloaded/" + url
# check if file already exists on disk
if url in downloadedLog or os.path.isfile(outputFilename):
print "Skipping " + downloadURL
continue
print "Downloading ",downloadURL
response = urllib2.urlopen(downloadURL)
zippedData = response.read()
# save data to disk
print "Saving to ",outputFilename
output = open(outputFilename,'wb')
output.write(zippedData)
output.close()
# extract the data
zfobj = zipfile.ZipFile(outputFilename)
for name in zfobj.namelist():
uncompressed = zfobj.read(name)
# save uncompressed data to disk
outputFilename = "extracted/" + name
print "Saving extracted file to ",outputFilename
output = open(outputFilename,'wb')
output.write(uncompressed)
output.close()
# send data via tcp stream
# file successfully downloaded and extracted store into local log and filesystem log
downloadedLog[url] = time.time();
pickle.dump(downloadedLog, open('downloaded.pickle', "wb" ))
最佳答案
我的建议是使用
StringIO对象.它们模拟文件,但驻留在内存中.所以你可以这样做:
# get_zip_data() gets a zip archive containing 'foo.txt', reading 'hey, foo'
from StringIO import StringIO
zipdata = StringIO()
zipdata.write(get_zip_data())
myzipfile = zipfile.ZipFile(zipdata)
foofile = myzipfile.open('foo.txt')
print foofile.read()
# output: "hey, foo"
或者更简单(向Vishal道歉):
myzipfile = zipfile.ZipFile(StringIO(get_zip_data()))
for name in myzipfile.namelist():
[ ... ]
在Python 3中使用BytesIO而不是StringIO.
点击查看更多相关文章
转载注明原文:python – 无需写入磁盘即可下载和解压缩.zip文件 - 乐贴网