Пример #1
0
 def update_gene_columns(df, allele_name, gene_name):
     for index, row in df.iterrows():
         for gene in ['v', 'j']:
             if NumpyHelper.is_nan_or_empty(
                     row[f"{gene}_{allele_name}"]
             ) and not NumpyHelper.is_nan_or_empty(
                     row[f"{gene}_{gene_name}"]):
                 df[f"{gene}_{allele_name}"][index] = row[
                     f"{gene}_{gene_name}"]
Пример #2
0
 def get_record(self):
     """exports the sequence object as a numpy record"""
     return [
         NumpyHelper.get_numpy_representation(getattr(self, name))
         if hasattr(self, name) else getattr(ReceptorSequence, name)
         for name in ReceptorSequence.FIELDS.keys()
     ]
Пример #3
0
 def get_counts(self):
     counts = self.get_attribute("counts")
     if counts is not None:
         counts = np.array([
             int(count) if not NumpyHelper.is_nan_or_empty(count) else None
             for count in counts
         ])
     return counts
Пример #4
0
    def _prepare_cell_lists(self):
        data = self.load_data()

        assert "cell_ids" in data.dtype.names and data["cell_ids"] is not None, \
            f"Repertoire: cannot return receptor objects in repertoire {self.identifier} since cell_ids are not specified. " \
            f"Existing fields are: {str(data.dtype.names)[1:-1]}"

        same_cell_lists = NumpyHelper.group_structured_array_by(
            data, "cell_ids")
        return same_cell_lists
Пример #5
0
 def process_custom_lists(custom_lists):
     if custom_lists:
         field_list = list(custom_lists.keys())
         values = [[
             NumpyHelper.get_numpy_representation(el)
             for el in custom_lists[field]
         ] for field in custom_lists.keys()]
         dtype = [(field, np.array(values[index]).dtype)
                  for index, field in enumerate(custom_lists.keys())]
     else:
         field_list, values, dtype = [], [], []
     return field_list, values, dtype
Пример #6
0
    def _make_sequence_object(self, row, load_implants: bool = False):

        fields = row.dtype.names

        implants = []
        if load_implants:
            keys = [
                key for key in row.dtype.names if key not in Repertoire.FIELDS
            ]
            for key in keys:
                value_dict = row[key]
                if value_dict:
                    try:
                        implants.append(
                            ImplantAnnotation(**ast.literal_eval(value_dict)))
                    except (SyntaxError, ValueError, TypeError) as e:
                        pass

        seq = ReceptorSequence(
            amino_acid_sequence=row["sequence_aas"]
            if "sequence_aas" in fields else None,
            nucleotide_sequence=row["sequences"]
            if "sequences" in fields else None,
            identifier=row["sequence_identifiers"]
            if "sequence_identifiers" in fields else None,
            metadata=SequenceMetadata(
                v_gene=row["v_genes"] if "v_genes" in fields else None,
                j_gene=row["j_genes"] if "j_genes" in fields else None,
                v_subgroup=row["v_subgroups"]
                if "v_subgroups" in fields else None,
                j_subgroup=row["j_subgroups"]
                if "j_subgroups" in fields else None,
                v_allele=row["v_alleles"] if "v_alleles" in fields else None,
                j_allele=row["j_alleles"] if "j_alleles" in fields else None,
                chain=row["chains"] if "chains" in fields else None,
                count=row["counts"] if "counts" in fields
                and not NumpyHelper.is_nan_or_empty(row['counts']) else None,
                region_type=row["region_types"]
                if "region_types" in fields else None,
                frame_type=row["frame_types"]
                if "frame_types" in fields else "IN",
                cell_id=row["cell_ids"] if "cell_ids" in fields else None,
                custom_params={
                    key: row[key] if key in fields else None
                    for key in set(self.fields) - set(Repertoire.FIELDS)
                }),
            annotation=SequenceAnnotation(implants=implants))

        return seq
Пример #7
0
    def build(cls,
              sequence_aas: list = None,
              sequences: list = None,
              v_genes: list = None,
              j_genes: list = None,
              v_subgroups: list = None,
              j_subgroups: list = None,
              v_alleles: list = None,
              j_alleles: list = None,
              chains: list = None,
              counts: list = None,
              region_types: list = None,
              frame_types: list = None,
              custom_lists: dict = None,
              sequence_identifiers: list = None,
              path: Path = None,
              metadata: dict = None,
              signals: dict = None,
              cell_ids: List[str] = None,
              filename_base: str = None):

        sequence_count = Repertoire.check_count(sequence_aas, sequences,
                                                custom_lists)

        if sequence_identifiers is None or len(
                sequence_identifiers) == 0 or any(
                    identifier is None for identifier in sequence_identifiers):
            sequence_identifiers = np.arange(sequence_count).astype(str)

        identifier = uuid4().hex

        filename_base = filename_base if filename_base is not None else identifier

        data_filename = path / f"{filename_base}.npy"

        field_list, values, dtype = Repertoire.process_custom_lists(
            custom_lists)

        if signals:
            signals_filtered = {
                signal: signals[signal]
                for signal in signals if signal not in metadata["field_list"]
            }
            field_list_signals, values_signals, dtype_signals = Repertoire.process_custom_lists(
                signals_filtered)

            field_list.extend(field_list_signals)
            values.extend(values_signals)
            dtype.extend(dtype_signals)

        for field in Repertoire.FIELDS:
            if eval(field) is not None and not all(el is None
                                                   for el in eval(field)):
                field_list.append(field)
                values.append([
                    NumpyHelper.get_numpy_representation(val)
                    if val is not None else np.nan for val in eval(field)
                ])
                dtype.append((field, np.array(values[-1]).dtype))

        repertoire_matrix = np.array(list(map(tuple, zip(*values))),
                                     order='F',
                                     dtype=dtype)
        np.save(str(data_filename), repertoire_matrix, allow_pickle=False)

        metadata_filename = path / f"{filename_base}_metadata.yaml"
        metadata = {} if metadata is None else metadata
        metadata["field_list"] = field_list
        with metadata_filename.open("w") as file:
            yaml.dump(metadata, file)

        repertoire = Repertoire(data_filename, metadata_filename, identifier)
        return repertoire
Пример #8
0
    def get_record(self):
        chains = self.get_chains()
        record = self.get_chain(chains[0]).get_record() + self.get_chain(chains[1]).get_record() \
                 + [NumpyHelper.get_numpy_representation(getattr(self, name)) for name in self.FIELDS if name not in chains]

        return record