def generateFiles(minsize): fname = tempfile.mktemp() f = open(fname, 'w') f.write('\0'*minsize) f.write(generateRandom(random.randrange(256)+1)) f.close() filekey = hashfile(fname) filekey = fencode(int(filekey, 16)) filename = os.path.join("/tmp",filekey) os.rename(fname,filename) filenamebad = os.path.join("/tmp/","bad"+filekey[3:]) shutil.copy(filename, filenamebad) return (filekey, filename, filenamebad)
def _storeFile(self, request, filekey, reqKu, nodeID): # [XXX: memory management is not happy here. might want to look at # request.registerProducer(). Otherwise, might have to scrap # using the STORE(ROOT(RESOURCE)) deal in favor of # producer/consumer model for STORE ops # (http://itamarst.org/writings/OSCON03/twisted_internet-108.html). # Another option might include subclassing web.resource.Resource # and making this derive from that... Or might be web.Site that # needs to be subclassed... Or maybe web.site.Request - # web.site.Request.process()? Request seems doubly-bad: perhaps a # copy is made somewhere, because memory mushrooms to 2x big # upload, then goes back down to around 1x. # [update: This should be fixable in twisted.web2, but I am informed # that in the current version, there is no workaround] # get the data to a tmp file loggerstor.debug("writing store data to tmpfile") tmpfile = tempfile.mktemp(dir=self.config.storedir) tarball = os.path.join(self.config.storedir,reqKu.id()+".tar") # rename and/or prepend the data appropriately tmpTarMode = None if filekey[-4:] == ".tar": tmpfile = tmpfile+".tar" tmpTarMode = 'r' targetTar = tarball elif filekey[-7:] == ".tar.gz": tmpfile = tmpfile+".tar.gz" tmpTarMode = 'r:gz' targetTar = tarball+".gz" loggerstor.debug("tmpfile is %s" % tmpfile) # XXX: if the server supports both .tar and tar.gz, this is wrong; we'd # need to check *both* for already existing dudes instead of just # choosing one if os.path.exists(tarball+'.gz'): tarball = (tarball+'.gz', 'r:gz') elif os.path.exists(tarball): tarball = (tarball, 'r') else: tarball = None loggerstor.debug("tarball is %s" % str(tarball)) data = request.args.get('filename')[0] # XXX: file in mem! need web2. # XXX: bad blocking stuff here f = open(tmpfile, 'wb') f.write(data) f.close() ftype = os.popen('file %s' % tmpfile) loggerstor.debug("ftype of %s is %s" % (tmpfile, ftype.read())) ftype.close() if tmpTarMode: # client sent a tarball loggerstor.debug("about to chksum %s" % tmpfile) digests = TarfileUtils.verifyHashes(tmpfile, '.meta') loggerstor.debug("chksum returned %s" % digests) ftype = os.popen('file %s' % tmpfile) loggerstor.debug("ftype of %s is %s" % (tmpfile, ftype.read())) ftype.close() if not digests: msg = "Attempted to use non-CAS storage key(s) for" \ " STORE tarball" loggerstor.debug(msg) os.remove(tmpfile) request.setResponseCode(http.CONFLICT, msg) return msg # XXX: add digests to a db of already stored files (for quick # lookup) if tarball: tarname, tarnameMode = tarball loggerstor.debug("concatenating tarfiles %s and %s" % (tarname, tmpfile)) f1 = tarfile.open(tarname, tarnameMode) f2 = tarfile.open(tmpfile, tmpTarMode) f1names = f1.getnames() f2names = f2.getnames() f1.close() f2.close() dupes = [f for f in f1names if f in f2names] TarfileUtils.delete(tmpfile, dupes) ftype = os.popen('file %s' % tarname) loggerstor.debug("ftype of %s is %s" % (tarname, ftype.read())) ftype.close() TarfileUtils.concatenate(tarname, tmpfile) ftype = os.popen('file %s' % tarname) loggerstor.debug("ftype of %s is %s" % (tarname, ftype.read())) ftype.close() else: loggerstor.debug("saving %s as tarfile %s" % (tmpfile, targetTar)) os.rename(tmpfile, targetTar) else: # client sent regular file h = hashfile(tmpfile) if request.args.has_key('meta') and request.args.has_key('metakey'): metakey = request.args.get('metakey')[0] meta = request.args.get('meta')[0] # XXX: file in mem! else: metakey = None meta = None if fencode(long(h, 16)) != filekey: msg = "Attempted to use non-CAS storage key for STORE data " msg += "(%s != %s)" % (filekey, fencode(long(h, 16))) os.remove(tmpfile) request.setResponseCode(http.CONFLICT, msg) return msg fname = os.path.join(self.config.storedir, filekey) if os.path.exists(fname): loggerstor.debug("adding metadata to %s" % fname) f = BlockFile.open(fname,'rb+') if not f.hasNode(nodeID): f.addNode(int(nodeID,16), {metakey: meta}) f.close() os.remove(tmpfile) else: if os.path.exists(nodeID+".tar"): # XXX: need to do something with metadata! print "XXX: need to do something with metadata for tar!" tarball = tarfile.open(tarname, 'r') if fname in tarball.getnames(): loggerstor.debug("%s already stored in tarball" % fname) # if the file is already in the corresponding tarball, # update its timestamp and return success. loggerstor.debug("%s already stored" % filekey) # XXX: update timestamp for filekey in tarball return "Successful STORE" else: loggerstor.debug("tarball for %s, but %s not in tarball" % (nodeID,fname)) if len(data) < 8192 and fname != tarname: #XXX: magic # (blk sz) # If the file is small, move it into the appropriate # tarball. Note that this code is unlikely to ever be # executed if the client is an official flud client, as # they do the tarball aggregation thing already, and all # tarballs will be > 8192. This is, then, really just # defensive coding -- clients aren't required to implement # that tarball aggregation strategy. And it is really only # useful for filesystems with inefficient small file # storage. loggerstor.debug("moving small file '%s' into tarball" % fname) if not os.path.exists(tarname): tarball = tarfile.open(tarname, 'w') else: tarball = tarfile.open(tarname, 'a') # XXX: more bad blocking stuff tarball.add(tmpfile, os.path.basename(fname)) if meta: metafilename = "%s.%s.meta" % (os.path.basename(fname), metakey) loggerstor.debug("adding metadata file to tarball %s" % metafilename) metaio = StringIO(meta) tinfo = tarfile.TarInfo(metafilename) tinfo.size = len(meta) tarball.addfile(tinfo, metaio) tarball.close() os.remove(tmpfile) else: # store the file loggerstor.debug("storing %s" % fname) os.rename(tmpfile, fname) BlockFile.convert(fname, (int(nodeID,16), {metakey: meta})) loggerstor.debug("successful STORE for %s" % filekey) return "Successful STORE"
def promptUser(self): helpDict = {} command = raw_input("%s> " % time.ctime()) commands = command.split(' ') # XXX: should tokenize on any whitespace commandkey = commands[0][:4] # core client operations helpDict['exit'] = "exit from the client" helpDict['help'] = "display this help message" helpDict['ping'] = "send a GETID() message: 'ping host port'" helpDict['putf'] = "store a file: 'putf canonicalfilepath'" helpDict['getf'] = "retrieve a file: 'getf canonicalfilepath'" helpDict['geti'] = "retrieve a file by CAS key: 'geti fencodedCASkey'" helpDict['fndn'] = "send a FINDNODE() message: 'fndn hexIDstring'" helpDict['list'] = "list stored files (read from local metadata)" helpDict['putm'] = "store master metadata" helpDict['getm'] = "retrieve master metadata" helpDict['cred'] = "send encrypted private credentials: cred"\ " passphrase emailaddress" helpDict['node'] = "list known nodes" helpDict['buck'] = "print k buckets" helpDict['stat'] = "show pending actions" helpDict['stor'] = "store a block to a given node:"\ " 'stor host:port,fname'" helpDict['rtrv'] = "retrieve a block from a given node:"\ " 'rtrv host:port,fname'" helpDict['vrfy'] = "verify a block on a given node:"\ " 'vrfy host:port:offset-length,fname'" helpDict['fndv'] = "retrieve a value from the DHT: 'fndv hexkey'" helpDict['dlet'] = "delete from the stor: '[XXX]'" if commandkey == 'exit' or commandkey == 'quit': self.quit = True elif commandkey == 'help': self.printHelp(helpDict) elif commandkey == 'ping': # ping a host # format: 'ping host port' func = lambda: self.sendPING(commands[1], commands[2]) self.callFactory(func, commands, self.msgs) elif commandkey == 'putf': # store a file # format: 'putf canonicalfilepath' func = lambda: self.sendPUTF(commands[1]) self.callFactory(func, commands, self.msgs) elif commandkey == 'getf': # retrieve a file # format: 'getf canonicalfilepath' func = lambda: self.sendGETF(commands[1]) self.callFactory(func, commands, self.msgs) elif commandkey == 'geti': # retrieve a file by CAS ID # format: 'geti fencoded_CAS_ID' func = lambda: self.sendGETI(commands[1]) self.callFactory(func, commands, self.msgs) elif commandkey == 'fndn': # find a node (or the k-closest nodes) # format: 'fndn hexIDstring' func = lambda: self.sendFNDN(commands[1]) self.callFactory(func, commands, self.msgs) elif commandkey == 'list': # list stored files self.callFactory(self.sendLIST, commands, self.msgs) elif commandkey == 'putm': # store master metadata self.callFactory(self.sendPUTM, commands, self.msgs) elif commandkey == 'getm': # retrieve master metadata self.callFactory(self.sendGETM, commands, self.msgs) elif commandkey == 'cred': # send encrypted private credentials to an email address # format: 'cred passphrase emailaddress' func = lambda: self.sendCRED( command[len(commands[0])+1:-len(commands[-1])-1], commands[-1]) self.callFactory(func, commands, self.msgs) # the following are diagnostic operations, debug-only utility elif commandkey == 'node': # list known nodes self.callFactory(self.sendDIAGNODE, commands, self.msgs) elif commandkey == 'buck': # show k-buckets self.callFactory(self.sendDIAGBKTS, commands, self.msgs) elif commandkey == 'stat': # show pending actions print self.pending elif commandkey == 'stor': # stor a block to a given node. format: 'stor host:port,fname' storcommands = commands[1].split(',') try: fileid = int(storcommands[1], 16) except: linkfile = fencode(long(hashfile(storcommands[1]),16)) if (os.path.islink(linkfile)): os.remove(linkfile) os.symlink(storcommands[1], linkfile) storcommands[1] = linkfile # XXX: delete this file when the command finishes commands[1] = "%s,%s" % (storcommands[0], storcommands[1]) func = lambda: self.sendDIAGSTOR(commands[1]) self.callFactory(func, commands, self.msgs) elif commandkey == 'rtrv': # retrive a block from a given node. format: 'rtrv host:port,fname' func = lambda: self.sendDIAGRTRV(commands[1]) self.callFactory(func, commands, self.msgs) elif commandkey == 'vrfy': # verify a block on a given node. # format: 'vrfy host:port:offset-length,fname' logger.debug("vrfy(%s)" % commands[1]) func = lambda: self.sendDIAGVRFY(commands[1]) self.callFactory(func, commands, self.msgs) elif commandkey == 'dlet': print "not yet implemented" elif commandkey == 'fndv': # try to retrieve a value from the DHT # format: 'fndv key' func = lambda: self.sendDIAGFNDV(commands[1]) self.callFactory(func, commands, self.msgs) elif command != "": reactor.callFromThread(self.queueError, None, self.msgs, "illegal command '%s'" % command)