def merge(self): """merge runs from parallel computations. returns true if merging was succecss. """ if self.isComplete(): return infiles = glob.glob("%s*" % self.mFilenameProfile) # remove suffixes infiles = list( set([x[:-4] for x in infiles if x != self.mFilenameProfile])) infiles.sort() last_nid = None found = set() ninput, noutput, nfound, nunknown, nduplicate = 0, 0, 0, 0, 0 tokens = set(self.mFasta.keys()) self.mProfileLibrary = ProfileLibrary.ProfileLibrary( self.mFilenameProfile, "w") for filename in infiles: infile = ProfileLibrary.ProfileLibrary(filename, "r") for nid, profile in infile.iteritems_sorted(): ninput += 1 if nid in found: nduplicates += 1 self.warn("duplicate nid: %i in file %s" % (nid, filename)) if nid not in tokens: nunknown += 1 self.warn("unknown nid: %i in file %s" % (nid, filename)) found.add(nid) nfound += 1 self.mProfileLibrary.add(nid, profile) noutput += 1 missing = tokens.difference(found) if len(missing) > 0: self.warn("the following nids were missing: %s" % str(missing)) self.info("adding %i missing nids" % len(missing)) for nid in missing: self.applyMethod(AddaIO.NeighboursRecord(nid, [])) self.info( "merging: parts=%i, ninput=%i, noutput=%i, nfound=%i, nmissing=%i, nduplicate=%i, nunknown=%i" %\ (len(infiles), ninput, noutput, nfound, len(missing), nduplicate, nunknown ) ) self.info("deleting %i parts" % len(infiles)) for infile in infiles: fn, fi = ProfileLibrary.getFileNames(infile) os.remove(fn) os.remove(fi) return len(missing) == 0 and nduplicate == 0 and nunknown == 0
def merge(self): """merge runs from parallel computations. returns true if merging was succecss. """ if self.isComplete(): return infiles = glob.glob( "%s*" % self.mFilenameProfile ) # remove suffixes infiles = list(set([ x[:-4] for x in infiles if x != self.mFilenameProfile ])) infiles.sort() last_nid = None found = set() ninput, noutput, nfound, nunknown, nduplicate = 0, 0, 0, 0, 0 tokens = set(self.mFasta.keys()) self.mProfileLibrary = ProfileLibrary.ProfileLibrary( self.mFilenameProfile, "w" ) for filename in infiles: infile = ProfileLibrary.ProfileLibrary( filename, "r" ) for nid, profile in infile.iteritems_sorted(): ninput += 1 if nid in found: nduplicates += 1 self.warn("duplicate nid: %i in file %s" % (nid, filename)) if nid not in tokens: nunknown += 1 self.warn("unknown nid: %i in file %s" % (nid, filename)) found.add(nid) nfound += 1 self.mProfileLibrary.add( nid, profile ) noutput += 1 missing = tokens.difference( found ) if len(missing) > 0: self.warn( "the following nids were missing: %s" % str(missing) ) self.info( "adding %i missing nids" % len(missing)) for nid in missing: self.applyMethod( AddaIO.NeighboursRecord( nid, [] ) ) self.info( "merging: parts=%i, ninput=%i, noutput=%i, nfound=%i, nmissing=%i, nduplicate=%i, nunknown=%i" %\ (len(infiles), ninput, noutput, nfound, len(missing), nduplicate, nunknown ) ) self.info( "deleting %i parts" % len(infiles) ) for infile in infiles: fn, fi = ProfileLibrary.getFileNames( infile ) os.remove( fn ) os.remove( fi ) return len(missing) == 0 and nduplicate == 0 and nunknown == 0
def isComplete(self): fn, fi = ProfileLibrary.getFileNames(self.mFilenameProfile + self.getSlice()) return SegmentedFile.isComplete(fi)
def isComplete( self ): fn, fi = ProfileLibrary.getFileNames( self.mFilenameProfile + self.getSlice() ) return SegmentedFile.isComplete( fi )