示例#1
0
    def extract_GO_terms_from_emapper_annotation_file(emapper_annotation_file,
                                                      output_file):
        GO_terms_dict = SynDict(filename=emapper_annotation_file,
                                key_index=0,
                                value_index=5,
                                split_values=True,
                                values_separator=",",
                                comments_prefix="#",
                                separator="\t")
        GO_terms_dict.header = "#protein_id\tGO_terms"
        GO_terms_dict.write(output_file, header=True, splited_values=True)

        return GO_terms_dict
示例#2
0
    def extract_predicted_gene_names_from_emapper_annotation_file(
            emapper_annotation_file, output_file):
        extract_predicted_gene_names_dict = SynDict(
            filename=emapper_annotation_file,
            key_index=0,
            value_index=4,
            split_values=True,
            values_separator=",",
            comments_prefix="#",
            separator="\t")
        extract_predicted_gene_names_dict.header = "#protein_id\tpredicted_gene_name"
        extract_predicted_gene_names_dict.write(output_file,
                                                header=True,
                                                splited_values=True)

        return extract_predicted_gene_names_dict
示例#3
0
    def count_miRNA_reads(self, alignment_file, gff_file, output_prefix, annotation_file_type="GTF",
                          min_read_fraction_overlap=1.0, feature_type_to_use=None, attribute_type_to_use=None,
                          sample_name=None, stranded=1):

        no_multimapped_read_counts = "%s.no_multimapped_reads.count" % output_prefix
        with_multimapped_read_counts = "%s.with_multimapped_reads.count" % output_prefix
        all_adjusted_read_counts = "%s.all_adjusted_reads.count" % output_prefix

        self.count(alignment_file, gff_file, no_multimapped_read_counts, annotation_file_type=annotation_file_type,
                   min_read_fraction_overlap=min_read_fraction_overlap, feature_type_to_use=feature_type_to_use,
                   attribute_type_to_use=attribute_type_to_use, stranded=stranded)

        self.count(alignment_file, gff_file, with_multimapped_read_counts, count_multimapped_reads=True,
                   annotation_file_type=annotation_file_type,
                   min_read_fraction_overlap=min_read_fraction_overlap, feature_type_to_use=feature_type_to_use,
                   attribute_type_to_use=attribute_type_to_use, stranded=stranded)

        no_multimapped_read_count_dict = SynDict(filename=no_multimapped_read_counts, comments_prefix="#",
                                                 key_index=0, value_index=6, expression=int, header=True)
        with_multimapped_read_count_dict = SynDict(filename=with_multimapped_read_counts, comments_prefix="#",
                                                   key_index=0, value_index=6, expression=int, header=True)
        similar_feature_number_dict = SynDict(filename=with_multimapped_read_counts, comments_prefix="#", header=True,
                                              key_index=0, value_index=1, expression=lambda s: len(s.split(";")))

        sample_nameeee = sample_name if sample_name else similar_feature_number_dict.header.split()[6]

        all_adjusted_read_count_dict = SynDict()
        all_adjusted_read_count_dict.header = ".\t%s" % sample_nameeee

        #print no_multimapped_read_count_dict
        #print with_multimapped_read_count_dict
        #print similar_feature_number_dict

        for feature_id in no_multimapped_read_count_dict:
            all_adjusted_read_count_dict[feature_id] = int(ceil(float(no_multimapped_read_count_dict[feature_id]) + \
                                                            (float(with_multimapped_read_count_dict[feature_id]) - float(no_multimapped_read_count_dict[feature_id])) / float(similar_feature_number_dict[feature_id])))

        all_adjusted_read_count_dict.write(all_adjusted_read_counts, header=True)