Exemplo n.º 1
0
    def Split( self, max_gap_length ):
        """split each alignment into several,
        if there is a gap longer than min_gap_length. This is necessary, as
        structural domains can be discontinuos.
        """

        statement = """
        SELECT nid, start, end, rep_ali,
        domain_id, domain_from, domain_to, domain_ali,
        family
        %s
        FROM %s""" % (self.GetAdditionalInfo(), self.name )

        tempfile = os.tempnam(Pairsdb.PATH_LOAD, "scmp")
        
        outfile = open( tempfile, "w" )
        
        domains = self.Execute(statement).fetchall()

        for domain in domains:
            (nid, start, end, rep_ali,
             domain_id, domain_from, domain_to, domain_ali,
             family) = domain[:9]

            map_rep2domains = alignlib.makeAlignataVector()

            alignlib.fillAlignataCompressed( map_rep2domains, start, rep_ali, domain_from, domain_ali)

            val = alignlib.splitAlignata( map_rep2domains, max_gap_length)
            
            fragments = map( lambda x: alignlib.AlignataPtr(x), val)
 
            ## now write each fragment to the output
            for map_rep2domain in fragments:
                ## so that the object gets deleted, once it goes out of scope
                map_rep2domain.thisown = 1
                                           
                start = map_rep2domain.getRowFrom()
                end = map_rep2domain.getRowTo()
                domain_from = map_rep2domain.getColFrom()
                domain_to = map_rep2domain.getColTo()

                (rep_ali, domain_ali) = alignlib.writeAlignataCompressed( map_rep2domain)

                self.WriteLine( outfile,
                                nid, 
                                map_rep2domain,
                                domain_id,
                                family,
                                domain[9:])
                    

        outfile.close()

        self.Drop()
        self.Create()
        self.Load( tempfile )
Exemplo n.º 2
0
    def Split( self, max_gap_length ):
        """split each alignment into several,
        if there is a gap longer than min_gap_length. This is necessary, as
        structural domains can be discontinuos.
        """

        statement = """
        SELECT nid, start, end, rep_ali,
        domain_id, domain_from, domain_to, domain_ali,
        family
        %s
        FROM %s""" % (self.GetAdditionalInfo(), self.name )

        tempfile = os.tempnam(Pairsdb.PATH_LOAD, "scmp")
        
        outfile = open( tempfile, "w" )
        
        domains = self.Execute(statement).fetchall()

        for domain in domains:
            (nid, start, end, rep_ali,
             domain_id, domain_from, domain_to, domain_ali,
             family) = domain[:9]

            map_rep2domains = alignlib.makeAlignataVector()

            alignlib.fillAlignataCompressed( map_rep2domains, start, rep_ali, domain_from, domain_ali)

            val = alignlib.splitAlignata( map_rep2domains, max_gap_length)
            
            fragments = map( lambda x: alignlib.AlignataPtr(x), val)
 
            ## now write each fragment to the output
            for map_rep2domain in fragments:
                ## so that the object gets deleted, once it goes out of scope
                map_rep2domain.thisown = 1
                                           
                start = map_rep2domain.getRowFrom()
                end = map_rep2domain.getRowTo()
                domain_from = map_rep2domain.getColFrom()
                domain_to = map_rep2domain.getColTo()

                (rep_ali, domain_ali) = alignlib.writeAlignataCompressed( map_rep2domain)

                self.WriteLine( outfile,
                                nid, 
                                map_rep2domain,
                                domain_id,
                                family,
                                domain[9:])
                    

        outfile.close()

        self.Drop()
        self.Create()
        self.Load( tempfile )
Exemplo n.º 3
0
    def GetLinks(self, query_nid, query_from, query_to, query_ali, sbjct_nid,
                 sbjct_from, sbjct_to, sbjct_ali):
        """returns all possible links between link split into domains.
        """

        if self.mLogLevel >= 2:
            print "# processing", query_nid, sbjct_nid, query_from, query_to, sbjct_from, sbjct_to
            sys.stdout.flush()

        map_query2sbjct = alignlib.makeAlignataVector()

        alignlib.fillAlignataCompressed(map_query2sbjct, query_from, query_ali,
                                        sbjct_from, sbjct_ali)

        # iterate over query
        for query_domain_from, query_domain_to, query_family in self.mDomains[
                query_nid]:

            # check if overlap
            overlap = min(query_to, query_domain_to) - max(
                query_from, query_domain_from) + 1
            if overlap <= self.mMinOverlapResidues: continue

            # check for overlap with domains in sbjct
            for sbjct_domain_from, sbjct_domain_to, sbjct_family in self.mDomains[
                    sbjct_nid]:

                overlap = min(sbjct_to, sbjct_domain_to) - max(
                    sbjct_from, sbjct_domain_from) + 1
                if overlap < self.mMinOverlapResidues: continue

                map_new_query2sbjct = alignlib.makeAlignataVector()
                alignlib.copyAlignata(map_new_query2sbjct, map_query2sbjct,
                                      query_domain_from, query_domain_to,
                                      sbjct_domain_from, sbjct_domain_to)

                if map_new_query2sbjct.getLength() > 0:

                    row_ali, col_ali = alignlib.writeAlignataCompressed(
                        map_new_query2sbjct)

                    print string.join(
                        ("%s_%s_%s" %
                         (query_nid, query_domain_from, query_domain_to),
                         "%s_%s_%s" %
                         (sbjct_nid, sbjct_domain_from, sbjct_domain_to), "0",
                         str(map_new_query2sbjct.getRowFrom()),
                         str(map_new_query2sbjct.getRowTo()), row_ali,
                         str(map_new_query2sbjct.getColFrom()),
                         str(map_new_query2sbjct.getColTo()), col_ali), "\t")
Exemplo n.º 4
0
    def CheckResult(self, result, info1=None, info2=None):
        """check if result is ok. The function below returns everything.
        return tuple of strings as result.
        """

        if (result.getLength() > 0):
            row_ali, col_ali = alignlib.writeAlignataCompressed(result)
            return map(
                str,
                (result.getScore(), result.getLength(), result.getNumGaps(),
                 alignlib.calculatePercentSimilarity(result),
                 result.getRowFrom(), result.getRowTo(), row_ali,
                 result.getColFrom(), result.getColTo(), col_ali))
        else:
            return ("0", ) * 12
Exemplo n.º 5
0
    def CheckResult( self,
                     result,
                     info1 = None,
                     info2 = None):
        """check if result is ok. The function below returns everything.
        return tuple of strings as result.
        """

        if (result.getLength() > 0):
            row_ali, col_ali = alignlib.writeAlignataCompressed( result )
            return map(str, (result.getScore(),
                             result.getLength(),
                             result.getNumGaps(),
                             alignlib.calculatePercentSimilarity( result ),
                             result.getRowFrom(), result.getRowTo(), row_ali,
                             result.getColFrom(), result.getColTo(), col_ali ) )
        else:
            return ("0",) * 12
    def GetLinks( self, query_nid, query_from, query_to, query_ali, sbjct_nid, sbjct_from, sbjct_to, sbjct_ali):
        """returns all possible links between link split into domains.
        """

        if self.mLogLevel >= 2:
            print "# processing", query_nid, sbjct_nid, query_from, query_to, sbjct_from, sbjct_to
            sys.stdout.flush()
            
        map_query2sbjct = alignlib.makeAlignataVector()

        alignlib.fillAlignataCompressed( map_query2sbjct, query_from, query_ali, sbjct_from, sbjct_ali )

        # iterate over query
        for query_domain_from, query_domain_to, query_family in self.mDomains[query_nid]:

            # check if overlap
            overlap = min(query_to, query_domain_to)-max(query_from, query_domain_from) + 1
            if overlap <= self.mMinOverlapResidues: continue

            # check for overlap with domains in sbjct
            for sbjct_domain_from, sbjct_domain_to, sbjct_family in self.mDomains[sbjct_nid]:
                
                overlap = min(sbjct_to, sbjct_domain_to)-max(sbjct_from, sbjct_domain_from) + 1
                if overlap < self.mMinOverlapResidues: continue

                map_new_query2sbjct = alignlib.makeAlignataVector()
                alignlib.copyAlignata( map_new_query2sbjct, map_query2sbjct,
                                       query_domain_from, query_domain_to,
                                       sbjct_domain_from, sbjct_domain_to)

                if map_new_query2sbjct.getLength() > 0:

                    row_ali, col_ali = alignlib.writeAlignataCompressed(  map_new_query2sbjct )
                    
                    print string.join( ("%s_%s_%s" % (query_nid, query_domain_from, query_domain_to),
                                        "%s_%s_%s" % (sbjct_nid, sbjct_domain_from, sbjct_domain_to),
                                        "0",
                                        str(map_new_query2sbjct.getRowFrom()),
                                        str(map_new_query2sbjct.getRowTo()),
                                        row_ali,
                                        str(map_new_query2sbjct.getColFrom()),
                                        str(map_new_query2sbjct.getColTo()),
                                        col_ali), "\t")
Exemplo n.º 7
0
    def Check(self):

        while 1:
            line = sys.stdin.readline()
            if not line: break

            try:
                (query_token, sbjct_token) = string.split(line[:-1], "\t")[:2]

                query_nid, query_from, query_to = map(
                    string.atoi, string.split(query_token, "_"))
                sbjct_nid, sbjct_from, sbjct_to = map(
                    string.atoi, string.split(sbjct_token, "_"))
            except ValueError:
                continue

            if self.mLogLevel >= 4:
                print "# --> checking link between %i (%i-%i) and %i (%i-%i)" % (
                    query_nid, query_from, query_to, sbjct_nid, sbjct_from,
                    sbjct_to)
                sys.stdout.flush()

            passed, alignment = self.CheckLink(query_nid, query_from, query_to,
                                               sbjct_nid, sbjct_from, sbjct_to)

            if passed:
                print "+\t",
            else:
                print "-\t",

            if alignment.getLength() > 0:
                ali_row, ali_col = alignlib.writeAlignataCompressed(alignment)
                print line[:-1] + "\t" + string.join(
                    map(str,
                        (alignment.getRowFrom(), alignment.getRowTo(), ali_row,
                         alignment.getColFrom(), alignment.getColTo(), ali_col,
                         alignment.getScore(), alignment.getLength(),
                         alignment.getNumGaps())), "\t")
            else:
                print line[:-1] + "\t" + string.join(
                    map(str, (0, 0, "", 0, 0, "", 0, 0, 0)), "\t")

            sys.stdout.flush()
Exemplo n.º 8
0
    def WriteLine(self, outfile, nid, map_rep2domain, domain_id, family, additional_info):
        """write line into file for loading into table.
        """

        start = map_rep2domain.getRowFrom()
        end = map_rep2domain.getRowTo()
        domain_from = map_rep2domain.getColFrom()
        domain_to = map_rep2domain.getColTo()

        (rep_ali, domain_ali) = alignlib.writeAlignataCompressed(map_rep2domain)

        outfile.write(
            string.join(
                map(str, (nid, start, end, rep_ali, domain_id, domain_from, domain_to, domain_ali, family))
                + map(str, additional_info),
                "\t",
            )
            + "\n"
        )
Exemplo n.º 9
0
    def WriteLine( self, outfile, nid, map_rep2domain, domain_id, family, additional_info):
        """write line into file for loading into table.
        """
        
        start = map_rep2domain.getRowFrom()
        end = map_rep2domain.getRowTo()
        domain_from = map_rep2domain.getColFrom()
        domain_to = map_rep2domain.getColTo()
        
        (rep_ali, domain_ali) = alignlib.writeAlignataCompressed( map_rep2domain )

        outfile.write ( string.join( map( str, ( nid,
                                              start,
                                              end,
                                              rep_ali,
                                              domain_id,
                                              domain_from,
                                              domain_to,
                                              domain_ali,
                                              family)) +
                                     map(str, additional_info), "\t") + "\n" )
Exemplo n.º 10
0
        
        if cur_record is None: break
        sequences.append( (cur_record.title, alignlib.makeSequence(re.sub( " ", "", cur_record.sequence)) ) )
    
    if options.filename_sequences:
        infile.close()

    alignator = alignlib.makeAlignatorFullDP( options.gop, options.gep )
    map_a2b = alignlib.makeAlignataVector()
    nsequences = len(sequences)
    
    for x in range(0,nsequences-1):
        for y in range(x+1, nsequences):
            alignator.Align( sequences[x][1], sequences[y][1], map_a2b)

            row_ali, col_ali = alignlib.writeAlignataCompressed( map_a2b )
            
            options.stdout.write( "%s\t%s\t%i\t%i\t%i\t%s\t%i\t%i\t%s\t%i\t%i\t%i\t%i\n" % (\
                sequences[x][0], sequences[y][0],
                map_a2b.getScore(),
                map_a2b.getRowFrom(),
                map_a2b.getRowTo(),
                row_ali,
                map_a2b.getColFrom(),
                map_a2b.getColTo(),
                col_ali,
                map_a2b.getScore(),
                100 * alignlib.calculatePercentIdentity( map_a2b, sequences[x][1], sequences[y][1]),
                sequences[x][1].getLength(),
                sequences[y][1].getLength() ))
            
Exemplo n.º 11
0
    def FillAlignments( self ):
        """the main all-vs-all alignments engine.
        """
        sources = self.mTableSources.GetSources()

        nsources = len(sources)
        if self.mLogLevel >= 1:
            print "--> calculating %i alignments for %i sequences" % ((nsources * (nsources -1)) / 2, nsources)

        # calculate the alignanda objects
        if self.mLogLevel >= 1:
            print "--> retrieving alignanda objects"
            sys.stdout.flush()
            
        alignanda = self.CreateAlignandumObjects( sources )

        map_query2sbjct = alignlib.makeAlignataVector()
        outfile = open (self.mTempFilename, "w" )
        
        
        # do all vs all alignments
        for query in range(0, nsources - 1):
            query_id, query_alignandum = alignanda[query]

            start_time = time.time()
            
            if self.mLogLevel >= 2:
                print "processing id %i at %s" % (query_id, time.asctime(time.localtime(start_time)))
                sys.stdout.flush()
            
            for sbjct in range( query + 1, nsources):
                sbjct_id, sbjct_alignandum = alignanda[sbjct]                

                self.mAlignator.Align( query_alignandum, sbjct_alignandum, map_query2sbjct )

                (query_ali, sbjct_ali) = alignlib.writeAlignataCompressed( map_query2sbjct )

                outfile.write( string.join( map( str, (
                    query_id,
                    map_query2sbjct.getRowFrom(),
                    map_query2sbjct.getRowTo(),
                    query_ali,
                    sbjct_id,
                    map_query2sbjct.getColFrom(),
                    map_query2sbjct.getColTo(),
                    sbjct_ali,
                    map_query2sbjct.getScore(),
                    map_query2sbjct.getNumGaps(),
                    map_query2sbjct.getLength(),
                    0)), "@") + "\n" )
                    
                    
            stop_time = time.time()
            
            if self.mLogLevel >= 2:
                print "--> alignments: %5i, time: %7.2fs" %\
                      ( nsources - query - 1,
                        stop_time - start_time)

        outfile.close()

        # load data
        self.mTableAlignments.Drop()
        self.mTableAlignments.Create()
        self.mTableAlignments.Load( self.mTempFilename)