async def get_external_participant_id_to_internal_sample_id_export( project: str, export_type: FileExtension, flip_columns: bool = False, connection: Connection = get_project_readonly_connection, ): """ Get csv / tsv export of external_participant_id to internal_sample_id :param flip_columns: Set to True when exporting for seqr """ player = ParticipantLayer(connection) # this wants project ID (connection.project) assert connection.project m = await player.get_external_participant_id_to_internal_sample_id_map( project=connection.project) rows = [[pid, sample_id_format(sid)] for pid, sid in m] if flip_columns: rows = [r[::-1] for r in rows] output = io.StringIO() writer = csv.writer(output, delimiter=export_type.get_delimiter()) writer.writerows(rows) ext = export_type.get_extension() filename = f'{project}-participant-to-sample-map-{date.today().isoformat()}{ext}' return StreamingResponse( iter(output.getvalue()), media_type=export_type.get_mime_type(), headers={'Content-Disposition': f'filename={filename}'}, )
async def get_sequence( sequence_id: int, connection: Connection = get_projectless_db_connection ): """Get sequence by sequence ID""" sequence_layer = SampleSequenceLayer(connection) resp = await sequence_layer.get_sequence_by_id(sequence_id, check_project_id=True) resp.sample_id = sample_id_format(resp.sample_id) # type: ignore[arg-type] return resp
async def get_all_sample_id_map_by_internal( connection: Connection = get_project_readonly_connection, ): """Get map of ALL sample IDs, { [internal_id]: external_sample_id }""" st = SampleLayer(connection) assert connection.project result = await st.get_all_sample_id_map_by_internal_ids( project=connection.project) return {sample_id_format(k): v for k, v in result.items()}
async def get_latest_sequence_ids_from_sample_ids( sample_ids: List[str], connection: Connection = get_projectless_db_connection ) -> Dict[str, int]: """Get sequence ids from internal sample ids""" sequence_layer = SampleSequenceLayer(connection) sample_ids_raw = sample_id_transform_to_raw_list(sample_ids) sequence_id_map = await sequence_layer.get_latest_sequence_ids_for_sample_ids( sample_ids_raw ) return {sample_id_format(k): v for k, v in sequence_id_map.items()}
async def get_sample_id_map_by_external( external_ids: List[str], allow_missing: bool = False, connection: Connection = get_project_readonly_connection, ): """Get map of sample IDs, { [externalId]: internal_sample_id }""" st = SampleLayer(connection) result = await st.get_sample_id_map_by_external_ids( external_ids, allow_missing=(allow_missing or False)) return {k: sample_id_format(v) for k, v in result.items()}
async def get_sequence_ids_from_sample_ids( sample_ids: List[str], connection: Connection = get_projectless_db_connection, ) -> Dict[str, Dict[SequenceType, int]]: """Get all sequences by internal Sample IDs list""" sequence_layer = SampleSequenceLayer(connection) sample_ids_raw = sample_id_transform_to_raw_list(sample_ids) sequence_id_map = await sequence_layer.get_sequence_ids_from_sample_ids( sample_ids_raw ) return {sample_id_format(k): v for k, v in sequence_id_map.items()}
async def get_sample_id_map_by_internal( internal_ids: List[str], connection: Connection = get_projectless_db_connection, ): """ Get map of sample IDs, { [internal_id]: external_sample_id } Without specifying a project, you might see duplicate external identifiers """ st = SampleLayer(connection) internal_ids_raw = sample_id_transform_to_raw_list(internal_ids) result = await st.get_sample_id_map_by_internal_ids(internal_ids_raw) return {sample_id_format(k): v for k, v in result.items()}
async def get_external_participant_id_to_internal_sample_id( connection: Connection = get_project_readonly_connection, ): """ Get a map of {external_participant_id} -> {internal_sample_id} useful to matching joint-called samples in the matrix table to the participant Return a list not dictionary, because dict could lose participants with multiple samples. """ player = ParticipantLayer(connection) assert connection.project m = await player.get_external_participant_id_to_internal_sample_id_map( project=connection.project) return [[pid, sample_id_format(sid)] for pid, sid in m]
async def get_project_summary( request: Request, limit: int = 20, token: Optional[str] = None, connection: Connection = get_project_readonly_connection, ) -> ProjectSummaryResponse: """Creates a new sample, and returns the internal sample ID""" st = WebLayer(connection) summary = await st.get_project_summary(token=token, limit=limit) if len(summary.participants) == 0: return ProjectSummaryResponse( participants=[], participant_keys=[], sample_keys=[], sequence_keys=[], _links=None, total_samples=0, ) participants = summary.participants collected_samples = sum(len(p.samples) for p in participants) new_token = None if collected_samples >= limit: new_token = max(sample.id for p in participants for sample in p.samples) for participant in participants: for sample in participant.samples: sample.id = sample_id_format(sample.id) links = PagingLinks( next=str(request.base_url) + request.url.path + f'?token={new_token}' if new_token else None, self=str(request.url), token=str(new_token) if new_token else None, ) return ProjectSummaryResponse( participants=participants, total_samples=summary.total_samples, participant_keys=summary.participant_keys, sample_keys=summary.sample_keys, sequence_keys=summary.sequence_keys, _links=links, )
async def get_sequences_by_internal_sample_ids( sample_ids: List[str], get_latest_sequence_only: bool = True, connection: Connection = get_projectless_db_connection, ): """Get a list of sequence objects by their internal CPG sample IDs""" sequence_layer = SampleSequenceLayer(connection) unwrapped_sample_ids: List[int] = sample_id_transform_to_raw_list(sample_ids) sequences = await sequence_layer.get_sequences_for_sample_ids( unwrapped_sample_ids, get_latest_sequence_only=get_latest_sequence_only ) for seq in sequences: seq.sample_id = sample_id_format(int(seq.sample_id)) return sequences
async def create_new_sample( sample: NewSample, connection: Connection = get_project_write_connection) -> str: """Creates a new sample, and returns the internal sample ID""" st = SampleLayer(connection) async with connection.connection.transaction(): internal_id = await st.insert_sample( external_id=sample.external_id, sample_type=sample.type, active=True, meta=sample.meta, participant_id=sample.participant_id, # already checked on get_project_write_connection check_project_id=False, ) return sample_id_format(internal_id)
async def batch_upsert_samples( samples: SampleBatchUpsertBody, connection: Connection = get_project_write_connection, ) -> Dict[str, Any]: """Upserts a list of samples with sequences, and returns the list of internal sample IDs""" # Convert id in samples to int for sample in samples.samples: if sample.id: sample.id = sample_id_transform_to_raw(sample.id) async with connection.connection.transaction(): # Table interfaces st = SampleLayer(connection) results = await st.batch_upsert_samples(samples) # Map sids back from ints to strs for iid, seqs in results.items(): data = {'sample_id': sample_id_format(iid), 'sequences': seqs} results[iid] = data return results
async def batch_upsert_participants( participants: ParticipantUpsertBody, connection: Connection = get_project_write_connection, ) -> Dict[str, Any]: """ Upserts a list of participants with samples and sequences Returns the list of internal sample IDs """ # Convert id in samples to int for participant in participants.participants: for sample in participant.samples: if sample.id: sample.id = sample_id_transform_to_raw(sample.id) external_pids = [p.external_id for p in participants.participants] async with connection.connection.transaction(): # Table interfaces pt = ParticipantLayer(connection) results = await pt.batch_upsert_participants(participants) pid_key = dict(zip(results.keys(), external_pids)) # Map sids back from ints to strs outputs: Dict[str, Dict[str, Any]] = {} for pid, samples in results.items(): samples_output: Dict[str, Any] = {} for iid, seqs in samples.items(): data = {'sequences': seqs} samples_output[sample_id_format(iid)] = data outputs[pid_key[pid]] = { 'id': pid, 'external_id': pid_key[pid], 'samples': samples_output, } return outputs
async def get_samples_by_criteria( sample_ids: List[str] = None, meta: Dict = None, participant_ids: List[int] = None, project_ids: List[str] = None, active: bool = Body(default=True), connection: Connection = get_projectless_db_connection, ): """ Get list of samples (dict) by some mixture of (AND'd) criteria """ st = SampleLayer(connection) pt = ProjectPermissionsTable(connection.connection) pids: Optional[List[int]] = None if project_ids: pids = await pt.get_project_ids_from_names_and_user(connection.author, project_ids, readonly=True) sample_ids_raw = sample_id_transform_to_raw_list( sample_ids) if sample_ids else None result = await st.get_samples_by( sample_ids=sample_ids_raw, meta=meta, participant_ids=participant_ids, project_ids=pids, active=active, check_project_ids=True, ) for sample in result: sample.id = sample_id_format(sample.id) return result
async def merge_samples( self, id_keep: int = None, id_merge: int = None, author: str = None, ): """Merge two samples together""" sid_merge = sample_id_format(id_merge) (_, sample_keep), (_, sample_merge) = await asyncio.gather( self.get_single_by_id(id_keep), self.get_single_by_id(id_merge), ) def list_merge(l1: Any, l2: Any) -> List: if l1 is None: return l2 if l2 is None: return l1 if l1 == l2: return l1 if isinstance(l1, list) and isinstance(l2, list): return l1 + l2 if isinstance(l1, list): return l1 + [l2] if isinstance(l2, list): return [l1] + l2 return [l1, l2] def dict_merge(meta1, meta2): d = dict(meta1) d.update(meta2) for key, value in meta2.items(): if key not in meta1 or meta1[key] is None or value is None: continue d[key] = list_merge(meta1[key], value) return d # this handles merging a sample that has already been merged meta_original = sample_keep.meta meta_original['merged_from'] = list_merge( meta_original.get('merged_from'), sid_merge) meta: Dict[str, Any] = dict_merge(meta_original, sample_merge.meta) values: Dict[str, Any] = { 'sample': { 'id': id_keep, 'author': author or self.author, 'meta': to_db_json(meta), }, 'ids': { 'id_keep': id_keep, 'id_merge': id_merge }, } _query = """ UPDATE sample SET author = :author, meta = :meta WHERE id = :id """ _query_seqs = f""" UPDATE sample_sequencing SET sample_id = :id_keep WHERE sample_id = :id_merge """ _query_analyses = f""" UPDATE analysis_sample SET sample_id = :id_keep WHERE sample_id = :id_merge """ _del_sample = f""" DELETE FROM sample WHERE id = :id_merge """ async with self.connection.transaction(): await self.connection.execute(_query, {**values['sample']}) await self.connection.execute(_query_seqs, {**values['ids']}) await self.connection.execute(_query_analyses, {**values['ids']}) await self.connection.execute(_del_sample, {'id_merge': id_merge}) project, new_sample = await self.get_single_by_id(id_keep) new_sample.project = project new_sample.author = author or self.author return new_sample