def create_blast_inputs(region: secmet.Region) -> Tuple[List[str], List[str]]: """ Creates fasta file contents for the cluster's CDS features Arguments: region: the secmet.Region to pull data from Returns: a tuple of: a list of CDS names a matching list of CDS sequences """ names = [] seqs = [] for cds in region.cds_children: if cds.strand == 1: strand = "+" else: strand = "-" fullname = "|".join([ "input", "c%d" % region.get_region_number(), "%d-%d" % (cds.location.start, cds.location.end), strand, cds.get_accession(), cds.product ]) names.append(fullname) seqs.append(cds.translation) return names, seqs
def __init__(self, region_feature: secmet.Region) -> None: region_number = region_feature.get_region_number() super().__init__(region_number, str(region_number), "%s_%d" % (region_feature.parent_record.id, region_number), "Query sequence", list(region_feature.cds_children), rank=0, cluster_type="query")
def __init__(self, results: NRPS_PKS_Results, region_feature: Region, record: RecordLayer) -> None: self.url_strict = {} # type: Dict[str, str] # gene name -> url self.url_relaxed = {} # type: Dict[str, str] # gene name -> url self._build_urls(region_feature.cds_children) super().__init__(record, region_feature) assert isinstance(results, NRPS_PKS_Results), type(results) self.results = results region_number = region_feature.get_region_number() self.candidate_clusters = [] # type: List[CandidateClusterLayer] for candidate_cluster_pred in results.region_predictions.get(region_number, []): candidate_cluster = record.get_candidate_cluster(candidate_cluster_pred.candidate_cluster_number) self.candidate_clusters.append(CandidateClusterLayer(candidate_cluster, candidate_cluster_pred))
def __init__(self, region: secmet.Region, ranking: List[Tuple[ReferenceCluster, Score]], reference_proteins: Dict[str, Protein], prefix: str) -> None: if ranking: assert reference_proteins self.prefix = prefix self.query_cluster = QueryRegion(region) region_number = region.get_region_number() cluster_limit = get_config().cb_nclusters self.colour_lookup = build_colour_groups(list(region.cds_children), ranking[:cluster_limit]) self.hits = [] # type: List[Cluster] record_prefix = region.parent_record.id.split(".", 1)[0] num_added = 0 queries = set() for cluster, score in ranking: if record_prefix == cluster.accession.split("_", 1)[0]: continue # determine overall strand direction of hits hit_genes = set() strand = determine_strand_of_cluster(region, score.scored_pairings) for query, subject in score.scored_pairings: queries.add(query.id) hit_genes.add(subject.name) svg_cluster = Cluster.from_reference_cluster( cluster, region_number, score, reference_proteins, num_added + 1, len(hit_genes), strand, self.prefix) self.hits.append(svg_cluster) num_added += 1 # obey the cluster display limit from options if num_added >= cluster_limit: break self.max_length = self._size_of_largest_cluster() self._organise_strands()
def build_anchor_id(region: Region) -> str: """ Builds a consistent HTML anchor identifier for a Region """ return "r{}c{}".format(region.parent_record.record_index, region.get_region_number())
def generate_javascript_data(_record: Record, region: Region, results: ClusterCompareResults) -> Dict[str, Any]: """ Generates JSON data for the javascript to draw relevant results in HTML output Arguments: record: the relevant Record for the results region: the specific Region to generate data for results: the ClusterCompareResults that need data extracted Returns: a JSON-friendly dictionary with the relevant data """ data: Dict[str, Any] = {} for label, db_results in results.by_database.items(): data[label] = {} variant_results = db_results.by_region.get(region.get_region_number(), {}) for variant, result in sorted(variant_results.items()): scores = sorted(result.scores_by_region, key=lambda x: x[1], reverse=True)[:DISPLAY_LIMIT] if not scores: continue variant_data: Dict[str, Dict[str, Any]] = { "reference_clusters": {} } data[label][variant] = variant_data for reference, _ in scores: ref_entry: Dict[str, Any] = { "start": reference.start, "end": reference.end, "links": [], # added to afterwards "reverse": False, # potentially changed later } genes = {} for cds in reference.cdses.values(): gene_json = cds.get_minimal_json() gene_json["linked"] = {} genes[cds.name] = gene_json variant_data["reference_clusters"][ reference.get_identifier()] = ref_entry mismatching_strands = 0 for ref_cds_id, hit in result.hits_by_region.get( reference, {}).items(): assert locations.locations_overlap(hit.cds.location, region.location) query_cds = hit.cds query_point = query_cds.location.start + ( query_cds.location.end - query_cds.location.start) // 2 ref_cds = reference.cdses[ref_cds_id] subject_point = ref_cds.location.start + ( ref_cds.location.end - ref_cds.location.start) // 2 if query_cds.location.strand != ref_cds.location.strand: mismatching_strands += 1 genes[ref_cds.name]["linked"][ region.get_region_number()] = query_cds.get_name() ref_entry["links"].append({ "query": query_cds.get_name(), "subject": ref_cds.name, "query_loc": query_point, "subject_loc": subject_point, }) ref_entry["reverse"] = mismatching_strands > len( ref_entry["links"]) / 2 ref_entry["genes"] = list(genes.values()) return data