Пример #1
0
    def check_feature_limits(self):

        if self.f.location.start > self.scaffold_size:
            self.wa_errors.append(WAError(WAError.OUTSIDE_SCAFFOLD_START,
                                          self))
        if self.f.location.end > self.scaffold_size:
            self.wa_errors.append(WAError(WAError.OUTSIDE_SCAFFOLD_END, self))

        fmax = None
        fmin = None
        buggy_strand = False
        for child in self.f.sub_features:
            if child.type == "mRNA":
                if not fmin or child.location.start < fmin:
                    fmin = child.location.start
                if not fmax or child.location.end > fmax:
                    fmax = child.location.end
                if not buggy_strand and child.location.strand != self.f.location.strand:
                    self.wa_errors.append(
                        WAError(WAError.WRONG_GENE_STRAND, self,
                                {'expected': child.location.strand}))
                    buggy_strand = True  # Don't report for each mrna

        if fmin and fmin > self.f.location.start:
            self.wa_errors.append(
                WAError(WAError.WRONG_GENE_START, self, {'expected': fmin}))
        if fmax and fmax < self.f.location.end:
            self.wa_errors.append(
                WAError(WAError.WRONG_GENE_END, self, {'expected': fmax}))
Пример #2
0
    def check_sub_features_mrna(self):

        for child in self.f.sub_features:
            if child.type != "mRNA":
                self.wa_errors.append(
                    WAError(
                        WAError.UNEXPECTED_SUB_FEATURE, self, {
                            'child_id': child.qualifiers['ID'][0],
                            'child_type': child.type
                        }))
Пример #3
0
    def check_sub_features(self):

        if len(self.f.sub_features) > 1:
            foundOverlap = False
            for sub1 in self.f.sub_features:
                firstChild = sub1
                start1 = firstChild.location.start
                end1 = firstChild.location.end
                for sub2 in sub1.sub_features:
                    if (sub1.qualifiers['ID'][0] != sub2.qualifiers['ID'][0]):
                        start2 = sub2.location.start
                        end2 = sub2.location.end
                        if ((start2 >= start1 and start2 <= end1)
                                or (end2 >= start1 and end2 <= end1)
                                or (start1 >= start2 and start1 <= end2)
                                or (end1 >= start2 and end1 <= end2)):
                            foundOverlap = True
            if not foundOverlap:
                self.wa_errors.append(
                    WAError(WAError.MULTIPLE_SUB_FEATURE, self,
                            {'num_children': len(self.f.sub_features)}))
            else:
                self.check_multiple_mrnas()
Пример #4
0
    def validate_genes(self):
        in_handle = open(self.in_file)
        for rec in GFF.parse(in_handle):
            for f in rec.features:
                if (f.type == "gene") and (
                        'status' not in f.qualifiers
                        or not f.qualifiers['status']
                        or f.qualifiers['status'][0].lower() != "deleted"):

                    gene = Gene(f, rec.id, self.scaf_lengths[rec.id],
                                self.allowed_groups, self.group_tags,
                                self.no_group, self.split_users)

                    self.all_genes[gene.wa_id] = gene

                    # Count number of genes with goid
                    if gene.has_goid:
                        self.genes_with_goid += 1

                    # Collect stats on groups
                    for g in gene.groups:
                        if g not in self.groups_stats:
                            self.groups_stats[g] = 0
                        self.groups_stats[g] += 1

                    new_part = gene.part
                    new_allele = gene.allele

                    # Collect wa_errors
                    self.wa_errors.extend(gene.wa_errors)

                    if not new_part and not new_allele:
                        self.genes_seen_once += 1

                    # keep track of splitted genes
                    if new_part:
                        part_gene_key = gene.display_id
                        if new_allele:
                            part_gene_key = gene.display_id + ", allele " + new_allele
                        if part_gene_key not in self.splitted_genes:
                            self.splitted_genes[part_gene_key] = {}
                        if new_part in self.splitted_genes[part_gene_key]:
                            identical = self.splitted_genes[part_gene_key][
                                new_part]

                            gene.errors.append(
                                GeneError(
                                    GeneError.PART_SAME, gene, {
                                        'other_name': identical.display_id,
                                        'other_scaff': identical.scaffold,
                                        'other_start':
                                        identical.f.location.start,
                                        'other_end': identical.f.location.end
                                    }))

                        self.splitted_genes[part_gene_key][new_part] = gene

                    # keep track of duplicated genes
                    if new_allele:
                        allele_gene_key = gene.display_id
                        if allele_gene_key not in self.duplicated_genes:
                            self.duplicated_genes[allele_gene_key] = {}
                        if new_allele in self.duplicated_genes[
                                allele_gene_key]:
                            identical = self.duplicated_genes[allele_gene_key][
                                new_allele]

                            if identical.part == new_part:

                                gene.errors.append(
                                    GeneError(
                                        GeneError.ALLELE_SAME, gene, {
                                            'other_name': identical.display_id,
                                            'other_scaff': identical.scaffold,
                                            'other_start':
                                            identical.f.location.start,
                                            'other_end':
                                            identical.f.location.end
                                        }))

                        self.duplicated_genes[allele_gene_key][
                            new_allele] = gene
                elif 'status' in f.qualifiers and f.qualifiers[
                        'status'] and f.qualifiers['status'][0].lower(
                        ) == "deleted":
                    gene = Gene(f, rec.id, self.scaf_lengths[rec.id],
                                self.allowed_groups, self.group_tags,
                                self.no_group, self.split_users)

                    self.all_genes[gene.wa_id] = gene
                else:
                    fake_gene = Gene(f, rec.id, self.scaf_lengths[rec.id],
                                     self.allowed_groups, self.group_tags)
                    self.wa_errors.append(
                        WAError(WAError.UNEXPECTED_FEATURE, fake_gene))

        in_handle.close()