def _build_new_sequence(self, sequence: ReceptorSequence, position, signal: dict) -> ReceptorSequence: gap_length = signal["motif_instance"].gap if "/" in signal["motif_instance"].instance: motif_left, motif_right = signal["motif_instance"].instance.split("/") else: motif_left = signal["motif_instance"].instance motif_right = "" gap_start = position+len(motif_left) gap_end = gap_start+gap_length part1 = sequence.get_sequence()[:position] part2 = sequence.get_sequence()[gap_start:gap_end] part3 = sequence.get_sequence()[gap_end+len(motif_right):] new_sequence_string = part1 + motif_left + part2 + motif_right + part3 annotation = SequenceAnnotation() implant = ImplantAnnotation(signal_id=signal["signal_id"], motif_id=signal["motif_id"], motif_instance=signal["motif_instance"], position=position) annotation.add_implant(implant) new_sequence = ReceptorSequence() new_sequence.set_annotation(annotation) new_sequence.set_metadata(copy.deepcopy(sequence.metadata)) new_sequence.set_sequence(new_sequence_string, EnvironmentSettings.get_sequence_type()) return new_sequence
def _create_new_sequences(self, sequences, new_sequence_count, signal) -> List[ReceptorSequence]: new_sequences = sequences[:-new_sequence_count] for _ in range(new_sequence_count): motif = random.choice(signal.motifs) motif_instance = motif.instantiate_motif() annotation = SequenceAnnotation([ ImplantAnnotation(signal_id=signal.id, motif_id=motif.identifier, motif_instance=motif_instance.instance, position=0) ]) metadata = SequenceMetadata(v_gene="TRBV6-1", j_gene="TRBJ2-7", count=1, chain="B") new_sequences.append( ReceptorSequence(amino_acid_sequence=motif_instance.instance, annotation=annotation, metadata=metadata)) return new_sequences
def _make_sequence_object(self, row, load_implants: bool = False): fields = row.dtype.names implants = [] if load_implants: keys = [ key for key in row.dtype.names if key not in Repertoire.FIELDS ] for key in keys: value_dict = row[key] if value_dict: try: implants.append( ImplantAnnotation(**ast.literal_eval(value_dict))) except (SyntaxError, ValueError, TypeError) as e: pass seq = ReceptorSequence( amino_acid_sequence=row["sequence_aas"] if "sequence_aas" in fields else None, nucleotide_sequence=row["sequences"] if "sequences" in fields else None, identifier=row["sequence_identifiers"] if "sequence_identifiers" in fields else None, metadata=SequenceMetadata( v_gene=row["v_genes"] if "v_genes" in fields else None, j_gene=row["j_genes"] if "j_genes" in fields else None, v_subgroup=row["v_subgroups"] if "v_subgroups" in fields else None, j_subgroup=row["j_subgroups"] if "j_subgroups" in fields else None, v_allele=row["v_alleles"] if "v_alleles" in fields else None, j_allele=row["j_alleles"] if "j_alleles" in fields else None, chain=row["chains"] if "chains" in fields else None, count=row["counts"] if "counts" in fields and not NumpyHelper.is_nan_or_empty(row['counts']) else None, region_type=row["region_types"] if "region_types" in fields else None, frame_type=row["frame_types"] if "frame_types" in fields else "IN", cell_id=row["cell_ids"] if "cell_ids" in fields else None, custom_params={ key: row[key] if key in fields else None for key in set(self.fields) - set(Repertoire.FIELDS) }), annotation=SequenceAnnotation(implants=implants)) return seq
def create_from_record(cls, record: np.void): if 'version' in record.dtype.names and record['version'] == cls.version: return ReceptorSequence( **{ **{ key: record[key] for key, val_type in ReceptorSequence.FIELDS.items() if val_type == str and key != 'version' }, **{ 'metadata': SequenceMetadata(**json.loads(record['metadata'])), 'annotation': SequenceAnnotation(**json.loads(record['annotation'])) } }) else: raise NotImplementedError
def _make_sequence_object(self, row): fields = row.dtype.names keys = [key for key in row.dtype.names if "signal" in key] implants = [] for key in keys: value_dict = row[key] if value_dict: implants.append( ImplantAnnotation(**ast.literal_eval(value_dict))) seq = ReceptorSequence( amino_acid_sequence=row["sequence_aas"] if "sequence_aas" in fields else None, nucleotide_sequence=row["sequences"] if "sequences" in fields else None, identifier=row["sequence_identifiers"] if "sequence_identifiers" in fields else None, metadata=SequenceMetadata( v_gene=row["v_genes"] if "v_genes" in fields else None, j_gene=row["j_genes"] if "j_genes" in fields else None, v_subgroup=row["v_subgroups"] if "v_subgroups" in fields else None, j_subgroup=row["j_subgroups"] if "j_subgroups" in fields else None, v_allele=row["v_alleles"] if "v_alleles" in fields else None, j_allele=row["j_alleles"] if "j_alleles" in fields else None, chain=row["chains"] if "chains" in fields else None, count=row["counts"] if "counts" in fields else None, region_type=row["region_types"] if "region_types" in fields else None, frame_type=row["frame_types"] if "frame_types" in fields else "IN", cell_id=row["cell_ids"] if "cell_ids" in fields else None, custom_params={ key: row[key] if key in fields else None for key in set(self.fields) - set(Repertoire.FIELDS) }), annotation=SequenceAnnotation(implants=implants)) return seq