Exemplo n.º 1
0
    def align(self, input_):
        if self.clustalw_exe is None:
            raise InitError("clustalw executable is not set")

        input_ = self._fix_input(input_)

        input_path = tempfile.mktemp()
        output_path = tempfile.mktemp()

        write_fasta(input_path, input_)

        cmd = [self.clustalw_exe, '-TYPE=PROTEIN', '-OUTPUT=FASTA',
               '-PWMATRIX=BLOSUM', '-OUTFILE=%s' % output_path, '-INFILE=%s' % input_path]

        try:
            p = subprocess.Popen(cmd,
                                 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            p.wait()

            if p.returncode != 0:
                raise RuntimeError("%s for %s" % (p.stderr.read().decode('ascii'), str(input_)))

            return Alignment(self._fix_output(parse_fasta(output_path)))
        finally:
            for path in [input_path, output_path]:
                if os.path.isfile(path):
                    os.remove(path)
Exemplo n.º 2
0
    def align(self, input_):
        if self.clustalw_exe is None:
            raise InitError("clustalw executable is not set")

        input_path = tempfile.mktemp()
        output_path = tempfile.mktemp()

        write_fasta(input_path, input_)

        cmd = [self.clustalw_exe, '-TYPE=PROTEIN', '-OUTPUT=FASTA',
               '-PWMATRIX=BLOSUM', '-OUTFILE=%s' % output_path, '-INFILE=%s' % input_path]

        try:
            p = subprocess.Popen(cmd,
                                 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            p.wait()

            if p.returncode != 0:
                raise RuntimeError("%s for %s" % (p.stderr.read().decode('ascii'), str(input_)))

            return Alignment(parse_fasta(output_path))
        finally:
            for path in [input_path, output_path]:
                if os.path.isfile(path):
                    os.remove(path)
Exemplo n.º 3
0
    def align(self,
              template_sequence,
              template_secstr,
              target_sequence,
              gap_open=-13.0,
              gap_extend=-0.4,
              modifier=3.0):

        _log.debug("kmad align\n{}\n{}\n{}".format(template_sequence,
                                                   template_secstr,
                                                   target_sequence))

        # Prevent kmad from adding insertions in bulges, replace those.
        template_secstr = self._remove_bulges(template_secstr, 'H', 3)
        template_secstr = self._remove_bulges(template_secstr, 'E', 3)

        if len(template_sequence) <= 0:
            raise ValueError("empty template sequence")
        if len(template_sequence) != len(template_secstr):
            raise ValueError(
                "template sequence ({}) has different length than secondary structure ({})"
                .format(len(template_sequence), len(template_secstr)))

        kmad_template_sequence = self._to_kmad_sequence(
            template_sequence, template_secstr)
        kmad_target_sequence = self._to_kmad_sequence(target_sequence)

        input_path = tempfile.mktemp()
        output_path = tempfile.mktemp()

        write_fasta(input_path, {
            'target': kmad_target_sequence,
            'template': kmad_template_sequence
        })
        try:
            self._run_kmad(input_path, output_path, gap_open, gap_extend,
                           modifier)

            output_path += '_al'

            aligned = parse_fasta(output_path)

            _log.debug("kmad aligned\n{}\n{}".format(aligned['target'],
                                                     aligned['template']))
        finally:
            for path in [input_path, output_path]:
                if os.path.isfile(path):
                    os.remove(path)

        alignment = TargetTemplateAlignment(aligned['target'],
                                            aligned['template'])
        return alignment
Exemplo n.º 4
0
    def align(self, template_sequence, template_secstr, target_sequence,
              gap_open=-13.0, gap_extend=-0.4, modifier=3.0):

        _log.debug("kmad align\n{}\n{}\n{}".format(template_sequence, template_secstr, target_sequence))

        # Prevent kmad from adding insertions in bulges, replace those.
        template_secstr = self._remove_bulges(template_secstr, 'H', 3)
        template_secstr = self._remove_bulges(template_secstr, 'E', 3)

        if len(template_sequence) <= 0:
            raise ValueError("empty template sequence")
        if len(template_sequence) != len(template_secstr):
            raise ValueError("template sequence ({}) has different length than secondary structure ({})"
                             .format(len(template_sequence), len(template_secstr)))

        kmad_template_sequence = self._to_kmad_sequence(template_sequence, template_secstr)
        kmad_target_sequence = self._to_kmad_sequence(target_sequence)

        input_path = tempfile.mktemp()
        output_path = tempfile.mktemp()

        write_fasta(input_path, {'target': kmad_target_sequence,
                                 'template': kmad_template_sequence})
        try:
            self._run_kmad(input_path, output_path, gap_open, gap_extend, modifier)

            output_path += '_al'

            aligned = parse_fasta(output_path)

            _log.debug("kmad aligned\n{}\n{}".format(aligned['target'], aligned['template']))
        finally:
            for path in [input_path, output_path]:
                if os.path.isfile(path):
                    os.remove(path)

        alignment = TargetTemplateAlignment(aligned['target'], aligned['template'])
        return alignment
Exemplo n.º 5
0
    args = arg_parser.parse_args()


    tmp_dir = tempfile.mkdtemp()
    model_storage.model_dir = tmp_dir

    final_output_dir = settings.MODEL_DIR
    if args.output_dir:
        final_output_dir = args.output_dir

    if not os.path.isdir(final_output_dir):
        raise ValueError("Not a directory: {}".format(final_output_dir))

    try:
        sequence = list(parse_fasta(args.fasta).values())[0]

        species_id = args.species.upper()

        if args.template:
            pdbid, chain_id = args.template.split('_')
            template_id = TemplateID(pdbid, chain_id)
        else:
            template_id = None

        domain_alignments = domain_aligner.get_domain_alignments(sequence, args.position, template_id)
        _log.info("{} domain alignments".format(len(domain_alignments)))

        ts = [ModelThread(sequence, species_id, ali, final_output_dir) for ali in domain_alignments]
        for t in ts:
            t.start()
Exemplo n.º 6
0
def pick_random_sequences(n):
    sprot_sequences = parse_fasta(SPROT_FASTA)
    keys = random.sample(sprot_sequences.keys(), n)

    return {key: sprot_sequences[key] for key in keys}
Exemplo n.º 7
0
    args = arg_parser.parse_args()


    tmp_dir = tempfile.mkdtemp()
    model_storage.model_dir = tmp_dir

    final_output_dir = settings.MODEL_DIR
    if args.output_dir:
        final_output_dir = args.output_dir

    if not os.path.isdir(final_output_dir):
        raise ValueError("Not a directory: {}".format(final_output_dir))

    try:
        sequence = parse_fasta(args.fasta).values()[0]

        species_id = args.species.upper()

        if args.template:
            pdbid, chain_id = args.template.split('_')
            template_id = TemplateID(pdbid, chain_id)
        else:
            template_id = None

        domain_alignments = domain_aligner.get_domain_alignments(sequence, args.position, template_id)
        _log.info("{} domain alignments".format(len(domain_alignments)))

        ts = [ModelThread(sequence, species_id, ali, final_output_dir) for ali in domain_alignments]
        for t in ts:
            t.start()
Exemplo n.º 8
0
        '--template', help="underscore separated template pdbid and chain")

    args = arg_parser.parse_args()

    tmp_dir = tempfile.mkdtemp()
    model_storage.model_dir = tmp_dir

    final_output_dir = settings.MODEL_DIR
    if args.output_dir:
        final_output_dir = args.output_dir

    if not os.path.isdir(final_output_dir):
        raise ValueError("Not a directory: {}".format(final_output_dir))

    try:
        sequence = list(parse_fasta(args.fasta).values())[0]

        species_id = args.species.upper()

        if args.template:
            pdbid, chain_id = args.template.split('_')
            template_id = TemplateID(pdbid, chain_id)
        else:
            template_id = None

        domain_alignments = domain_aligner.get_domain_alignments(
            sequence, args.position, template_id)
        _log.info("{} domain alignments".format(len(domain_alignments)))

        ts = [
            ModelThread(sequence, species_id, ali, final_output_dir)
Exemplo n.º 9
0
def pick_random_sequences(n):
    sprot_sequences = parse_fasta(SPROT_FASTA)
    keys = random.sample(sprot_sequences.keys(), n)

    return {key:sprot_sequences[key] for key in keys}