Exemplo n.º 1
0
    def merge(self):
        """merge runs from parallel computations.

        returns true if merging was succecss.
        """
        if self.isComplete(): return

        infiles = glob.glob("%s*" % self.mFilenameProfile)
        # remove suffixes
        infiles = list(
            set([x[:-4] for x in infiles if x != self.mFilenameProfile]))
        infiles.sort()

        last_nid = None
        found = set()
        ninput, noutput, nfound, nunknown, nduplicate = 0, 0, 0, 0, 0
        tokens = set(self.mFasta.keys())

        self.mProfileLibrary = ProfileLibrary.ProfileLibrary(
            self.mFilenameProfile, "w")

        for filename in infiles:
            infile = ProfileLibrary.ProfileLibrary(filename, "r")

            for nid, profile in infile.iteritems_sorted():
                ninput += 1

                if nid in found:
                    nduplicates += 1
                    self.warn("duplicate nid: %i in file %s" % (nid, filename))
                if nid not in tokens:
                    nunknown += 1
                    self.warn("unknown nid: %i in file %s" % (nid, filename))
                found.add(nid)
                nfound += 1
                self.mProfileLibrary.add(nid, profile)
                noutput += 1

        missing = tokens.difference(found)
        if len(missing) > 0:
            self.warn("the following nids were missing: %s" % str(missing))

        self.info("adding %i missing nids" % len(missing))

        for nid in missing:
            self.applyMethod(AddaIO.NeighboursRecord(nid, []))

        self.info( "merging: parts=%i, ninput=%i, noutput=%i, nfound=%i, nmissing=%i, nduplicate=%i, nunknown=%i" %\
                       (len(infiles), ninput, noutput, nfound, len(missing), nduplicate, nunknown ) )

        self.info("deleting %i parts" % len(infiles))
        for infile in infiles:
            fn, fi = ProfileLibrary.getFileNames(infile)
            os.remove(fn)
            os.remove(fi)

        return len(missing) == 0 and nduplicate == 0 and nunknown == 0
Exemplo n.º 2
0
    def merge(self):
        """merge runs from parallel computations.

        returns true if merging was succecss.
        """
        if self.isComplete(): return
        
        infiles = glob.glob( "%s*" % self.mFilenameProfile )
        # remove suffixes
        infiles = list(set([ x[:-4] for x in infiles if x != self.mFilenameProfile ]))
        infiles.sort()

        last_nid = None
        found = set()
        ninput, noutput, nfound, nunknown, nduplicate = 0, 0, 0, 0, 0
        tokens = set(self.mFasta.keys())

        self.mProfileLibrary = ProfileLibrary.ProfileLibrary( self.mFilenameProfile,
                                                              "w" )

        for filename in infiles:
            infile = ProfileLibrary.ProfileLibrary( filename, "r" )

            for nid, profile in infile.iteritems_sorted():
                ninput += 1
                
                if nid in found:
                    nduplicates += 1
                    self.warn("duplicate nid: %i in file %s" % (nid, filename))
                if nid not in tokens:
                    nunknown += 1
                    self.warn("unknown nid: %i in file %s" % (nid, filename))
                found.add(nid)
                nfound += 1
                self.mProfileLibrary.add( nid, profile )
                noutput += 1

        missing = tokens.difference( found ) 
        if len(missing) > 0:
            self.warn( "the following nids were missing: %s" % str(missing) )
            
        self.info( "adding %i missing nids" % len(missing))
        
        for nid in missing:
            self.applyMethod( AddaIO.NeighboursRecord( nid, [] ) )

        self.info( "merging: parts=%i, ninput=%i, noutput=%i, nfound=%i, nmissing=%i, nduplicate=%i, nunknown=%i" %\
                       (len(infiles), ninput, noutput, nfound, len(missing), nduplicate, nunknown ) )

        self.info( "deleting %i parts" % len(infiles) )
        for infile in infiles:
            fn, fi = ProfileLibrary.getFileNames( infile )
            os.remove( fn )
            os.remove( fi )
        
        return len(missing) == 0 and nduplicate == 0 and nunknown == 0
Exemplo n.º 3
0
    def isComplete(self):

        fn, fi = ProfileLibrary.getFileNames(self.mFilenameProfile +
                                             self.getSlice())
        return SegmentedFile.isComplete(fi)
Exemplo n.º 4
0
    def isComplete( self ):

        fn, fi = ProfileLibrary.getFileNames( self.mFilenameProfile + self.getSlice() )
        return SegmentedFile.isComplete( fi )