async def get_external_participant_id_to_internal_sample_id_export(
    project: str,
    export_type: FileExtension,
    flip_columns: bool = False,
    connection: Connection = get_project_readonly_connection,
):
    """
    Get csv / tsv export of external_participant_id to internal_sample_id

    :param flip_columns: Set to True when exporting for seqr
    """
    player = ParticipantLayer(connection)
    # this wants project ID (connection.project)
    assert connection.project
    m = await player.get_external_participant_id_to_internal_sample_id_map(
        project=connection.project)

    rows = [[pid, sample_id_format(sid)] for pid, sid in m]
    if flip_columns:
        rows = [r[::-1] for r in rows]

    output = io.StringIO()
    writer = csv.writer(output, delimiter=export_type.get_delimiter())
    writer.writerows(rows)

    ext = export_type.get_extension()
    filename = f'{project}-participant-to-sample-map-{date.today().isoformat()}{ext}'
    return StreamingResponse(
        iter(output.getvalue()),
        media_type=export_type.get_mime_type(),
        headers={'Content-Disposition': f'filename={filename}'},
    )
Exemplo n.º 2
0
async def get_sequence(
    sequence_id: int, connection: Connection = get_projectless_db_connection
):
    """Get sequence by sequence ID"""
    sequence_layer = SampleSequenceLayer(connection)
    resp = await sequence_layer.get_sequence_by_id(sequence_id, check_project_id=True)
    resp.sample_id = sample_id_format(resp.sample_id)  # type: ignore[arg-type]
    return resp
Exemplo n.º 3
0
async def get_all_sample_id_map_by_internal(
    connection: Connection = get_project_readonly_connection, ):
    """Get map of ALL sample IDs, { [internal_id]: external_sample_id }"""
    st = SampleLayer(connection)
    assert connection.project
    result = await st.get_all_sample_id_map_by_internal_ids(
        project=connection.project)
    return {sample_id_format(k): v for k, v in result.items()}
Exemplo n.º 4
0
async def get_latest_sequence_ids_from_sample_ids(
    sample_ids: List[str], connection: Connection = get_projectless_db_connection
) -> Dict[str, int]:
    """Get sequence ids from internal sample ids"""
    sequence_layer = SampleSequenceLayer(connection)
    sample_ids_raw = sample_id_transform_to_raw_list(sample_ids)
    sequence_id_map = await sequence_layer.get_latest_sequence_ids_for_sample_ids(
        sample_ids_raw
    )
    return {sample_id_format(k): v for k, v in sequence_id_map.items()}
Exemplo n.º 5
0
async def get_sample_id_map_by_external(
    external_ids: List[str],
    allow_missing: bool = False,
    connection: Connection = get_project_readonly_connection,
):
    """Get map of sample IDs, { [externalId]: internal_sample_id }"""
    st = SampleLayer(connection)
    result = await st.get_sample_id_map_by_external_ids(
        external_ids, allow_missing=(allow_missing or False))
    return {k: sample_id_format(v) for k, v in result.items()}
Exemplo n.º 6
0
async def get_sequence_ids_from_sample_ids(
    sample_ids: List[str],
    connection: Connection = get_projectless_db_connection,
) -> Dict[str, Dict[SequenceType, int]]:
    """Get all sequences by internal Sample IDs list"""
    sequence_layer = SampleSequenceLayer(connection)
    sample_ids_raw = sample_id_transform_to_raw_list(sample_ids)
    sequence_id_map = await sequence_layer.get_sequence_ids_from_sample_ids(
        sample_ids_raw
    )
    return {sample_id_format(k): v for k, v in sequence_id_map.items()}
Exemplo n.º 7
0
async def get_sample_id_map_by_internal(
    internal_ids: List[str],
    connection: Connection = get_projectless_db_connection,
):
    """
    Get map of sample IDs, { [internal_id]: external_sample_id }
    Without specifying a project, you might see duplicate external identifiers
    """
    st = SampleLayer(connection)
    internal_ids_raw = sample_id_transform_to_raw_list(internal_ids)
    result = await st.get_sample_id_map_by_internal_ids(internal_ids_raw)
    return {sample_id_format(k): v for k, v in result.items()}
async def get_external_participant_id_to_internal_sample_id(
    connection: Connection = get_project_readonly_connection, ):
    """
    Get a map of {external_participant_id} -> {internal_sample_id}
    useful to matching joint-called samples in the matrix table to the participant

    Return a list not dictionary, because dict could lose
    participants with multiple samples.
    """
    player = ParticipantLayer(connection)
    assert connection.project
    m = await player.get_external_participant_id_to_internal_sample_id_map(
        project=connection.project)
    return [[pid, sample_id_format(sid)] for pid, sid in m]
Exemplo n.º 9
0
async def get_project_summary(
    request: Request,
    limit: int = 20,
    token: Optional[str] = None,
    connection: Connection = get_project_readonly_connection,
) -> ProjectSummaryResponse:
    """Creates a new sample, and returns the internal sample ID"""
    st = WebLayer(connection)

    summary = await st.get_project_summary(token=token, limit=limit)

    if len(summary.participants) == 0:
        return ProjectSummaryResponse(
            participants=[],
            participant_keys=[],
            sample_keys=[],
            sequence_keys=[],
            _links=None,
            total_samples=0,
        )

    participants = summary.participants

    collected_samples = sum(len(p.samples) for p in participants)
    new_token = None
    if collected_samples >= limit:
        new_token = max(sample.id for p in participants for sample in p.samples)

    for participant in participants:
        for sample in participant.samples:
            sample.id = sample_id_format(sample.id)

    links = PagingLinks(
        next=str(request.base_url) + request.url.path + f'?token={new_token}'
        if new_token
        else None,
        self=str(request.url),
        token=str(new_token) if new_token else None,
    )

    return ProjectSummaryResponse(
        participants=participants,
        total_samples=summary.total_samples,
        participant_keys=summary.participant_keys,
        sample_keys=summary.sample_keys,
        sequence_keys=summary.sequence_keys,
        _links=links,
    )
Exemplo n.º 10
0
async def get_sequences_by_internal_sample_ids(
    sample_ids: List[str],
    get_latest_sequence_only: bool = True,
    connection: Connection = get_projectless_db_connection,
):
    """Get a list of sequence objects by their internal CPG sample IDs"""
    sequence_layer = SampleSequenceLayer(connection)
    unwrapped_sample_ids: List[int] = sample_id_transform_to_raw_list(sample_ids)
    sequences = await sequence_layer.get_sequences_for_sample_ids(
        unwrapped_sample_ids, get_latest_sequence_only=get_latest_sequence_only
    )

    for seq in sequences:
        seq.sample_id = sample_id_format(int(seq.sample_id))

    return sequences
Exemplo n.º 11
0
async def create_new_sample(
        sample: NewSample,
        connection: Connection = get_project_write_connection) -> str:
    """Creates a new sample, and returns the internal sample ID"""
    st = SampleLayer(connection)
    async with connection.connection.transaction():
        internal_id = await st.insert_sample(
            external_id=sample.external_id,
            sample_type=sample.type,
            active=True,
            meta=sample.meta,
            participant_id=sample.participant_id,
            # already checked on get_project_write_connection
            check_project_id=False,
        )
        return sample_id_format(internal_id)
Exemplo n.º 12
0
async def batch_upsert_samples(
    samples: SampleBatchUpsertBody,
    connection: Connection = get_project_write_connection,
) -> Dict[str, Any]:
    """Upserts a list of samples with sequences, and returns the list of internal sample IDs"""

    # Convert id in samples to int
    for sample in samples.samples:
        if sample.id:
            sample.id = sample_id_transform_to_raw(sample.id)

    async with connection.connection.transaction():
        # Table interfaces
        st = SampleLayer(connection)

        results = await st.batch_upsert_samples(samples)

        # Map sids back from ints to strs
        for iid, seqs in results.items():
            data = {'sample_id': sample_id_format(iid), 'sequences': seqs}
            results[iid] = data

        return results
async def batch_upsert_participants(
    participants: ParticipantUpsertBody,
    connection: Connection = get_project_write_connection,
) -> Dict[str, Any]:
    """
    Upserts a list of participants with samples and sequences
    Returns the list of internal sample IDs
    """
    # Convert id in samples to int
    for participant in participants.participants:
        for sample in participant.samples:
            if sample.id:
                sample.id = sample_id_transform_to_raw(sample.id)

    external_pids = [p.external_id for p in participants.participants]

    async with connection.connection.transaction():
        # Table interfaces
        pt = ParticipantLayer(connection)

        results = await pt.batch_upsert_participants(participants)
        pid_key = dict(zip(results.keys(), external_pids))

        # Map sids back from ints to strs
        outputs: Dict[str, Dict[str, Any]] = {}
        for pid, samples in results.items():
            samples_output: Dict[str, Any] = {}
            for iid, seqs in samples.items():
                data = {'sequences': seqs}
                samples_output[sample_id_format(iid)] = data
            outputs[pid_key[pid]] = {
                'id': pid,
                'external_id': pid_key[pid],
                'samples': samples_output,
            }

        return outputs
Exemplo n.º 14
0
async def get_samples_by_criteria(
    sample_ids: List[str] = None,
    meta: Dict = None,
    participant_ids: List[int] = None,
    project_ids: List[str] = None,
    active: bool = Body(default=True),
    connection: Connection = get_projectless_db_connection,
):
    """
    Get list of samples (dict) by some mixture of (AND'd) criteria
    """
    st = SampleLayer(connection)

    pt = ProjectPermissionsTable(connection.connection)
    pids: Optional[List[int]] = None
    if project_ids:
        pids = await pt.get_project_ids_from_names_and_user(connection.author,
                                                            project_ids,
                                                            readonly=True)

    sample_ids_raw = sample_id_transform_to_raw_list(
        sample_ids) if sample_ids else None

    result = await st.get_samples_by(
        sample_ids=sample_ids_raw,
        meta=meta,
        participant_ids=participant_ids,
        project_ids=pids,
        active=active,
        check_project_ids=True,
    )

    for sample in result:
        sample.id = sample_id_format(sample.id)

    return result
Exemplo n.º 15
0
    async def merge_samples(
        self,
        id_keep: int = None,
        id_merge: int = None,
        author: str = None,
    ):
        """Merge two samples together"""
        sid_merge = sample_id_format(id_merge)
        (_, sample_keep), (_, sample_merge) = await asyncio.gather(
            self.get_single_by_id(id_keep),
            self.get_single_by_id(id_merge),
        )

        def list_merge(l1: Any, l2: Any) -> List:
            if l1 is None:
                return l2
            if l2 is None:
                return l1
            if l1 == l2:
                return l1
            if isinstance(l1, list) and isinstance(l2, list):
                return l1 + l2
            if isinstance(l1, list):
                return l1 + [l2]
            if isinstance(l2, list):
                return [l1] + l2
            return [l1, l2]

        def dict_merge(meta1, meta2):
            d = dict(meta1)
            d.update(meta2)
            for key, value in meta2.items():
                if key not in meta1 or meta1[key] is None or value is None:
                    continue

                d[key] = list_merge(meta1[key], value)

            return d

        # this handles merging a sample that has already been merged
        meta_original = sample_keep.meta
        meta_original['merged_from'] = list_merge(
            meta_original.get('merged_from'), sid_merge)
        meta: Dict[str, Any] = dict_merge(meta_original, sample_merge.meta)

        values: Dict[str, Any] = {
            'sample': {
                'id': id_keep,
                'author': author or self.author,
                'meta': to_db_json(meta),
            },
            'ids': {
                'id_keep': id_keep,
                'id_merge': id_merge
            },
        }

        _query = """
            UPDATE sample
            SET author = :author,
                meta = :meta
            WHERE id = :id
        """
        _query_seqs = f"""
            UPDATE sample_sequencing
            SET sample_id = :id_keep
            WHERE sample_id = :id_merge
        """
        _query_analyses = f"""
            UPDATE analysis_sample
            SET sample_id = :id_keep
            WHERE sample_id = :id_merge
        """
        _del_sample = f"""
            DELETE FROM sample
            WHERE id = :id_merge
        """

        async with self.connection.transaction():
            await self.connection.execute(_query, {**values['sample']})
            await self.connection.execute(_query_seqs, {**values['ids']})
            await self.connection.execute(_query_analyses, {**values['ids']})
            await self.connection.execute(_del_sample, {'id_merge': id_merge})

        project, new_sample = await self.get_single_by_id(id_keep)
        new_sample.project = project
        new_sample.author = author or self.author

        return new_sample