Python Sequence.readSequences примеры использования

Язык программирования: Python

Пространство имен/Пакет: sequences

Класс/Тип: Sequence

Метод/Функция: readSequences

Примеров на hotexamples.com: 4

Python Sequence.readSequences - 4 примера найдено. Это лучшие примеры Python кода для sequences.Sequence.readSequences, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Sequence(17)

outputSequencesToFasta(5)

readSequences(4)

combineSequenceFilesBySpecies(2)

import_from_data(1)

Пример #1

Показать файл

    def load_from_alignment_file(self, filename):
        '''
		Loads the sequences from the given sequence file (does not need to be an aligned file)
		'''
        self.seq_file = filename

        self.sequences = Sequence.readSequences(filename)

Пример #2

Показать файл

    def load_scores(self, gene_family):

        dealignedfile = gene_family.seq_file + ".dealigned.fa"
        Sequence.outputSequencesToFasta(gene_family.sequences, dealignedfile)

        fsafile = gene_family.seq_file + ".fsa.fa"

        cmd = "fsa " + dealignedfile + " > " + fsafile

        print("EXEC " + cmd)

        if not os.path.isfile(
                fsafile
        ) or not "use_cache" in self.other_args or os.path.getsize(
                fsafile) <= 10:
            os.system(cmd)
        else:
            print("Actually, file exists and we'll use it.")

        seqs = Sequence.readSequences(fsafile)

        gene_family.scores = Distances.getPairwisePctID(sequences=seqs,
                                                        verbose=False,
                                                        run_nw_algorithm=False)

Пример #3

Показать файл

#here's an annoying problem: when having multiple input files, some (distinct) genes will have the same name
#(this happens with simphy)
#so here we copy the alginment files, but rename the genes with its file index
newinfiles = ""
files_list = infiles.split(",")

newfile_to_old_file = {}

if rename_genes:
    print("Copying alignment files, ensuring gene name uniqueness...")
    for i in range(len(files_list)):
        f = files_list[i]
        filename, ext = os.path.splitext(f)
        newfile = join(workdir,
                       os.path.basename(f).replace(ext, "_" + str(i) + ".fa"))
        sequences = Sequence.readSequences(f)
        Sequence.outputSequencesToFasta(sequences, newfile, str(i), True)

        if newinfiles != "":
            newinfiles += ","
        newinfiles += newfile

        newfile_to_old_file[newfile] = f

    infiles = newinfiles
else:
    for i in range(len(files_list)):
        f = files_list[i]
        newfile_to_old_file[f] = f

#############################################################################################

Пример #4

Показать файл

    def predict_orthologs(self, files, workdir, speciestree_file):

        workdir_seqs = join(workdir, "in")
        workdir_out = join(workdir, "out")

        allseqs = []
        for f in files:
            seqs = Sequence.readSequences(f)
            for s in seqs:
                allseqs.append(s)

        #one file per species, see OMA comments above
        if len(self.cached_clusters) == 0:
            seqs_by_species = Sequence.combineSequenceFilesBySpecies(
                files, self.other_args["species_separator"],
                int(self.other_args["species_index"]))

            isdna = True
            if "seqtype" in self.other_args and self.other_args[
                    "seqtype"] == "AA":
                print("Input type set to AA")
                isdna = False

            if not os.path.exists(workdir_seqs):
                os.mkdir(workdir_seqs)
            for key in seqs_by_species:
                outfile = join(workdir_seqs, key + ".fasta")
                Sequence.outputSequencesToFasta(sequences=seqs_by_species[key],
                                                filename=outfile,
                                                name_suffix="",
                                                aligned=False,
                                                convertToAA=isdna,
                                                name_prefix="")

            cmd = "/u/lafonman/src/orthomcl-pipeline/bin/orthomcl-pipeline -i " + workdir_seqs + " -o " + workdir_out + " -m /u/lafonman/src/orthomcl-pipeline/orthomcl.conf --nocompliant --yes"
            print("EXEC " + cmd)
            os.system(cmd)

            seen_genes = set()
            clusters = []
            clfile = join(workdir_out, "groups/groups.txt")
            f = open(clfile, 'r')
            for line in f:
                line = line.replace("\n", "")
                if line != "":
                    gz = line.split(":")[1].split()
                    cluster = set()
                    for g in gz:
                        gname = g.split("|")[1]
                        cluster.add(gname)
                        seen_genes.add(gname)
                    clusters.append(cluster)

            f.close()

            for s in allseqs:
                name = s.name
                if not name in seen_genes:
                    cl = set()
                    cl.add(name)
                    clusters.append(cl)

            self.cached_clusters = clusters
        else:
            print("USING CACHED CLUSTERS")
            clusters = self.cached_clusters

        #restrict clusters to current family
        #f_set = set()
        #for s in gene_family.sequences:
        #	f_set.add(s.name)
        #f_clusters = []
        #for cl in clusters:
        #	inter = f_set.intersection(cl)
        #	if len(inter) > 0:
        #		f_clusters.append(inter)

        self.clusters_filenames = [join(workdir, "orthomcl.clusters")]
        self.relations_filenames = [join(workdir, "orthomcl.relations")]

        write_clusters(self.clusters_filenames[0], clusters)

        #output relations
        relstr = ""
        seen_keys = {}
        for c in clusters:
            for c1 in c:
                for c2 in c:
                    if c1 != c2:

                        key1 = c1 + ";;" + c2
                        key2 = c2 + ";;" + c1

                        if key1 not in seen_keys and key2 not in seen_keys:
                            seen_keys[key1] = 1
                            seen_keys[key2] = 1
                            relstr += c1 + "\t" + c2 + "\t"

                            sp1 = c1.split(
                                self.other_args["species_separator"])[int(
                                    self.other_args["species_index"])]
                            sp2 = c2.split(
                                self.other_args["species_separator"])[int(
                                    self.other_args["species_index"])]

                            if sp1 != sp2:
                                relstr += "Orthologs"
                            else:
                                relstr += "Paralogs"

                            relstr += ";;"
        relstr = relstr[0:-2]  #extra ;; at end

        for i in range(len(allseqs)):
            for j in range(i + 1, len(allseqs)):
                n1 = allseqs[i].name
                n2 = allseqs[j].name
                key = n1 + ";;" + n2
                if not key in seen_keys:
                    relstr += n1 + "\t" + n2 + "\t" + "Paralogs" + ";;"

        f = open(self.relations_filenames[0], 'w')
        f.write(relstr)
        f.close()