Пример #1
0
def align_two_alignments(aln1, aln2, moltype, params=None):
    """Returns an Alignment object from two existing Alignments.

    aln1, aln2: cogent.core.alignment.Alignment objects, or data that can be
    used to build them.

    params: dict of parameters to pass in to the Clustal app controller.
    """
    #create SequenceCollection object from seqs
    aln1 = Alignment(aln1, MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    aln1_int_map, aln1_int_keys = aln1.getIntMap()
    #Create SequenceCollection from int_map.
    aln1_int_map = Alignment(aln1_int_map, MolType=moltype)

    #create Alignment object from aln
    aln2 = Alignment(aln2, MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    aln2_int_map, aln2_int_keys = aln2.getIntMap(prefix='seqn_')
    #Create SequenceCollection from int_map.
    aln2_int_map = Alignment(aln2_int_map, MolType=moltype)

    #Update aln1_int_keys with aln2_int_keys
    aln1_int_keys.update(aln2_int_keys)

    #Create Mafft app.
    app = Clustalw(InputHandler='_input_as_multiline_string',\
        params=params,
        SuppressStderr=True)
    app.Parameters['-align'].off()
    app.Parameters['-infile'].off()
    app.Parameters['-profile'].on()

    #Add aln_int_map as profile1
    app.Parameters['-profile1'].on(\
        app._tempfile_as_multiline_string(aln1_int_map.toFasta()))

    #Add seq_int_map as profile2
    app.Parameters['-profile2'].on(\
        app._tempfile_as_multiline_string(aln2_int_map.toFasta()))
    #Get results using int_map as input to app
    res = app()

    #Get alignment as dict out of results
    alignment = dict(ClustalParser(res['Align'].readlines()))

    #Make new dict mapping original IDs
    new_alignment = {}
    for k, v in alignment.items():
        new_alignment[aln1_int_keys[k]] = v
    #Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment, MolType=moltype)
    #Clean up
    res.cleanUp()
    remove(app.Parameters['-profile1'].Value)
    remove(app.Parameters['-profile2'].Value)
    del(aln1,aln1_int_map,aln1_int_keys,\
        aln2,aln2_int_map,aln2_int_keys,app,res,alignment)

    return new_alignment
Пример #2
0
def align_two_alignments(aln1, aln2, moltype, params=None):
    """Returns an Alignment object from two existing Alignments.

    aln1, aln2: cogent.core.alignment.Alignment objects, or data that can be
    used to build them.

    params: dict of parameters to pass in to the Clustal app controller.
    """
    #create SequenceCollection object from seqs
    aln1 = Alignment(aln1,MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    aln1_int_map, aln1_int_keys = aln1.getIntMap()
    #Create SequenceCollection from int_map.
    aln1_int_map = Alignment(aln1_int_map,MolType=moltype)
    
    #create Alignment object from aln
    aln2 = Alignment(aln2,MolType=moltype)
    #Create mapping between abbreviated IDs and full IDs
    aln2_int_map, aln2_int_keys = aln2.getIntMap(prefix='seqn_')
    #Create SequenceCollection from int_map.
    aln2_int_map = Alignment(aln2_int_map,MolType=moltype)
    
    #Update aln1_int_keys with aln2_int_keys
    aln1_int_keys.update(aln2_int_keys)
    
    #Create Mafft app.
    app = Clustalw(InputHandler='_input_as_multiline_string',\
        params=params,
        SuppressStderr=True)
    app.Parameters['-align'].off()
    app.Parameters['-infile'].off()
    app.Parameters['-profile'].on()
    
    #Add aln_int_map as profile1
    app.Parameters['-profile1'].on(\
        app._tempfile_as_multiline_string(aln1_int_map.toFasta()))
    
    #Add seq_int_map as profile2
    app.Parameters['-profile2'].on(\
        app._tempfile_as_multiline_string(aln2_int_map.toFasta()))
    #Get results using int_map as input to app
    res = app()
    
    #Get alignment as dict out of results
    alignment = dict(ClustalParser(res['Align'].readlines()))
    
    #Make new dict mapping original IDs
    new_alignment = {}
    for k,v in alignment.items():
        new_alignment[aln1_int_keys[k]]=v
    #Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment,MolType=moltype)
    #Clean up
    res.cleanUp()
    remove(app.Parameters['-profile1'].Value)
    remove(app.Parameters['-profile2'].Value)
    del(aln1,aln1_int_map,aln1_int_keys,\
        aln2,aln2_int_map,aln2_int_keys,app,res,alignment)

    return new_alignment
Пример #3
0
def add_seqs_to_alignment(seqs, aln, moltype, params=None):
    """Returns an Alignment object from seqs and existing Alignment.

    seqs: a cogent.core.alignment.SequenceCollection object, or data that can
    be used to build one.

    aln: a cogent.core.alignment.Alignment object, or data that can be used to
    build one

    params: dict of parameters to pass in to the Clustal app controller.
    """
    # create SequenceCollection object from seqs
    seq_collection = SequenceCollection(seqs, MolType=moltype)
    # Create mapping between abbreviated IDs and full IDs
    seq_int_map, seq_int_keys = seq_collection.getIntMap()
    # Create SequenceCollection from int_map.
    seq_int_map = SequenceCollection(seq_int_map, MolType=moltype)

    # create Alignment object from aln
    aln = Alignment(aln, MolType=moltype)
    # Create mapping between abbreviated IDs and full IDs
    aln_int_map, aln_int_keys = aln.getIntMap(prefix="seqn_")
    # Create SequenceCollection from int_map.
    aln_int_map = Alignment(aln_int_map, MolType=moltype)

    # Update seq_int_keys with aln_int_keys
    seq_int_keys.update(aln_int_keys)

    # Create Mafft app.
    app = Clustalw(InputHandler="_input_as_multiline_string", params=params, SuppressStderr=True)
    app.Parameters["-align"].off()
    app.Parameters["-infile"].off()
    app.Parameters["-sequences"].on()

    # Add aln_int_map as profile1
    app.Parameters["-profile1"].on(app._tempfile_as_multiline_string(aln_int_map.toFasta()))

    # Add seq_int_map as profile2
    app.Parameters["-profile2"].on(app._tempfile_as_multiline_string(seq_int_map.toFasta()))
    # Get results using int_map as input to app
    res = app()

    # Get alignment as dict out of results
    alignment = dict(ClustalParser(res["Align"].readlines()))

    # Make new dict mapping original IDs
    new_alignment = {}
    for k, v in alignment.items():
        new_alignment[seq_int_keys[k]] = v
    # Create an Alignment object from alignment dict
    new_alignment = Alignment(new_alignment, MolType=moltype)
    # Clean up
    res.cleanUp()
    remove(app.Parameters["-profile1"].Value)
    remove(app.Parameters["-profile2"].Value)
    del (seq_collection, seq_int_map, seq_int_keys, aln, aln_int_map, aln_int_keys, app, res, alignment)

    return new_alignment
Пример #4
0
    def __call__(self, data=None, remove_tmp=True):
        """Run the application with the specified kwargs on data
        
            data: anything that can be cast into a string or written out to
                a file. Usually either a list of things or a single string or 
                number. input_handler will be called on this data before it 
                is passed as part of the command-line argument, so by creating
                your own input handlers you can customize what kind of data
                you want your application to accept

            remove_tmp: if True, removes tmp files

            NOTE: Override of the base class to handle redirected output
        """
        input_handler = self.InputHandler
        suppress_stderr = self.SuppressStderr

        outfile = self.getTmpFilename(self.TmpDir)
        self._outfile = outfile

        if suppress_stderr:
            errfile = FilePath("/dev/null")
        else:
            errfile = FilePath(self.getTmpFilename(self.TmpDir))
        if data is None:
            input_arg = ""
        else:
            input_arg = getattr(self, input_handler)(data)

        # Build up the command, consisting of a BaseCommand followed by
        # input and output (file) specifications
        command = self._command_delimiter.join(
            filter(None, [self.BaseCommand, str(input_arg), ">", str(outfile), "2>", str(errfile)])
        )
        if self.HaltExec:
            raise AssertionError, "Halted exec with command:\n" + command
        # The return value of system is a 16-bit number containing the signal
        # number that killed the process, and then the exit status.
        # We only want to keep the exit status so do a right bitwise shift to
        # get rid of the signal number byte
        exit_status = system(command) >> 8

        # Determine if error should be raised due to exit status of
        # appliciation
        if not self._accept_exit_status(exit_status):
            raise ApplicationError, "Unacceptable application exit status: %s, command: %s" % (
                str(exit_status),
                command,
            )

        out = open(outfile, "r")

        err = None
        if not suppress_stderr:
            err = open(errfile, "r")

        result = CommandLineAppResult(out, err, exit_status, result_paths=self._get_result_paths(data))

        # Clean up the input file if one was created
        if remove_tmp:
            if self._input_filename:
                remove(self._input_filename)
                self._input_filename = None

        return result
Пример #5
0
    def __call__(self, data=None, remove_tmp=True):
        """Run the application with the specified kwargs on data
        
            data: anything that can be cast into a string or written out to
                a file. Usually either a list of things or a single string or 
                number. input_handler will be called on this data before it 
                is passed as part of the command-line argument, so by creating
                your own input handlers you can customize what kind of data
                you want your application to accept

            remove_tmp: if True, removes tmp files

            NOTE: Override of the base class to handle redirected output
        """
        input_handler = self.InputHandler
        suppress_stderr = self.SuppressStderr

        outfile = self.getTmpFilename(self.TmpDir)
        self._outfile = outfile

        if suppress_stderr:
            errfile = FilePath('/dev/null')
        else:
            errfile = FilePath(self.getTmpFilename(self.TmpDir))
        if data is None:
            input_arg = ''
        else:
            input_arg = getattr(self, input_handler)(data)

        # Build up the command, consisting of a BaseCommand followed by
        # input and output (file) specifications
        command = self._command_delimiter.join(filter(None,\
            [self.BaseCommand,str(input_arg),'>',str(outfile),'2>',\
                str(errfile)]))
        if self.HaltExec:
            raise AssertionError, "Halted exec with command:\n" + command
        # The return value of system is a 16-bit number containing the signal
        # number that killed the process, and then the exit status.
        # We only want to keep the exit status so do a right bitwise shift to
        # get rid of the signal number byte
        exit_status = system(command) >> 8

        # Determine if error should be raised due to exit status of
        # appliciation
        if not self._accept_exit_status(exit_status):
            raise ApplicationError, \
             'Unacceptable application exit status: %s, command: %s'\
                % (str(exit_status),command)

        out = open(outfile, "r")

        err = None
        if not suppress_stderr:
            err = open(errfile, "r")

        result =  CommandLineAppResult(out,err,exit_status,\
            result_paths=self._get_result_paths(data))

        # Clean up the input file if one was created
        if remove_tmp:
            if self._input_filename:
                remove(self._input_filename)
                self._input_filename = None

        return result