Exemplo n.º 1
0
def get_analysis(user, token_info, paging_token, page_size):
    assert_user_has("search", token_info)
    default_token = {"page_size": page_size or 100, "offset": 0}
    token = parse_paging_token(paging_token) or default_token
    # If user has 'own-institution' clearance, pass an implicit filter to the query
    institution_filter = (
        token_info["institution"]
        if token_info["sofi-data-clearance"] == "own-institution"
        else False
    )
    items = get_analysis_page(
        token.get("query", {}),
        token["page_size"],
        token["offset"],
        authorized_columns(token_info),
        institution_filter,
    )
    count = get_analysis_count(token.get("query", {}))
    new_token = (
        None
        if len(items) < token["page_size"]
        else render_paging_token(
            token["page_size"],
            token.get("query", {}),
            token["offset"] + token["page_size"],
        )
    )
    response = {
        "items": items,
        "paging_token": new_token,
        "total_count": count,
        "approval_matrix": {},
    }
    audit_query(token_info, items)
    return jsonify(response)
Exemplo n.º 2
0
def bulk_metadata(user, token_info, path, metadata_tsv):
    assert_user_has("approve", token_info)
    metadata_list = validate_metadata_tsv(metadata_tsv)
    for m in metadata_list:
        assert_authorized_to_edit(token_info, m)
    errors = []
    sequence_names = []
    for m in metadata_list:
        sequence_names.extend(m["sequence_filename"].split())

    trimmed_path = path.read().decode("utf-8").strip('"').strip()
    existing_sequences, missing_sequences = check_bulk_isolate_exists(
        trimmed_path, sequence_names
    )
    if existing_sequences:
        try:
            metadata = [UploadMetadataFields.from_dict(m) for m in metadata_list]
            upload_metadata_list(metadata)
        except Exception as e:
            errors.append(f"Error: {str(e)}")

    errors.extend(
        [f"Missing {filename} in directory" for filename in missing_sequences]
    )
    return upload_response_helper(errors)
Exemplo n.º 3
0
def create_approval(user, token_info, body: ApprovalRequest):
    assert_user_has("approve", token_info)
    appr = Approval()
    appr.matrix = body.matrix
    appr.approver = user
    appr.timestamp = datetime.datetime.now()
    appr.status = "submitted"
    appr.id = str(uuid.uuid4())

    # set approval dates on  approved sequences before sending them for
    # approval, so the timestamps can be transferred to upstream metadata
    # services, if needed.
    analysis_timestamp_updates = {}
    seq_update = {}
    for seq in body.matrix:
        fields = body.matrix[seq]

        # find dates that were already approved, for incremental approval case.
        existing_matrix = get_approval_matrix(seq)

        existing_matrix.update(fields)
        time_fields = find_approved_categories(body.matrix[seq])

        # When approving date_epi, automatically generate the timestamp
        if fields.get("date_epi", False):
            seq_update["date_epi"] = appr.timestamp
            time_fields += "date_epi"
        for f in time_fields:
            seq_update[f] = appr.timestamp
        analysis_timestamp_updates[seq] = seq_update

    update_analysis(analysis_timestamp_updates)

    errors_tuple = handle_approvals(appr, token_info["institution"])
    errors = []
    analysis_timestamp_reverts = {}
    for (error_seq_id, error) in errors_tuple:
        time_fields = find_approved_categories(appr.matrix[error_seq_id])
        for f in time_fields:
            analysis_timestamp_reverts[error_seq_id] = {f: None}
        del appr.matrix[error_seq_id]
        errors.append(error)

    # If any sequences errored out on the metadata service, revert their
    # date_analysis_sofi timestamp
    update_analysis(analysis_timestamp_reverts)

    # Insert approval after matrix has been manipulated
    res = insert_approval(token_info["email"], appr)

    return (
        jsonify({"success": appr.to_dict(), "error": errors})
        if res != None
        else abort(400)
    )
Exemplo n.º 4
0
def single_upload(user, token_info, metadata, _files):
    assert_user_has("approve", token_info)
    base_metadata: UploadMetadataFields = UploadMetadataFields.from_dict(
        json.loads(metadata.read())
    )
    assert_authorized_to_edit(token_info, base_metadata.to_dict())
    try:
        files = request.files.getlist("files")
        upload_isolate(base_metadata, files, token_info["institution"])
        return upload_response_helper()
    except Exception as e:
        return upload_response_helper([e.args[0]])
Exemplo n.º 5
0
def extract_data_from_pi(user,
                         token_info,
                         identifier_type=None,
                         identifier=None):
    assert_user_has("gdpr.manage", token_info)
    document = personal_data_from_identifier(identifier_type, identifier)
    res = personal_data_to_text(document)
    mail = token_info["email"]
    # TODO: should we log the identifier here? would be PII itself
    app.logger.info(
        f"[GDPR Audit]: User -{mail}- extracted personally identifiable information via {identifier_type} {identifier}"
    )
    return PersonalData(data=res)
Exemplo n.º 6
0
def multi_upload(user, token_info, metadata_tsv, _files):
    assert_user_has("approve", token_info)
    # Files aren't properly routed from connexion, use these files instead:
    files = request.files.getlist("files")
    errors = []
    metadata = []
    try:
        metadata_list = validate_metadata_tsv(metadata_tsv)
        metadata_map = {item["sequence_filename"]: item for item in metadata_list}
        filenames = [x.filename for x in files]

        for key, metadata in metadata_map.items():
            split_filenames = key.split()
            files_for_metadata = []
            if not all(f in filenames for f in split_filenames):
                errors.append(f"Could not find samples {key} in metadata TSV.")
                continue
            else:
                files_for_metadata = [f for f in files if f.filename in split_filenames]

            try:
                if metadata:
                    base_metadata = UploadMetadataFields.from_dict(metadata)
                    current_errors = validate_metadata(
                        base_metadata, files_for_metadata
                    )
                    if authorized_to_edit(token_info, base_metadata):
                        if not current_errors:
                            upload_isolate(
                                base_metadata,
                                files_for_metadata,
                                token_info["institution"],
                            )
                        else:
                            errors.extend(current_errors)
                    else:
                        errors.append(
                            f"You are not authorized to edit isolate -{base_metadata.isolate_id}-"
                        )

            except Exception as e:
                import traceback

                print(traceback.format_exc(), file=sys.stderr)
                errors.append(f"Error with files: {key}, {str(e)}")
                continue
    except Exception as e:
        print(e, file=sys.stderr)

    return upload_response_helper(errors)
Exemplo n.º 7
0
def get_sequence_by_id(user, token_info, sequence_id):
    assert_user_has("search", token_info)
    row = get_single_analysis(sequence_id)
    if row is None:
        abort(404)
    if (
        token_info["sofi-data-clearance"] == "own-institution"
        and token_info["institution"] != row["institution"]
    ):
        abort(404)
    allowed_cols = authorized_columns(token_info)
    for key, _ in list(row.items()):
        # Only return columns user is allowed to see
        if not key in allowed_cols:
            del row[key]
    return jsonify(row)
Exemplo n.º 8
0
def submit_changes(
    user, token_info: Dict[str, str], body: Dict[str, Any]
) -> Dict[str, Dict[str, Any]]:
    assert_user_has("approve", token_info)
    updates = list(map(lambda x: x, body.keys()))
    allowed_cols = authorized_columns(token_info)
    for identifier in updates:
        row = get_single_analysis(identifier)
        # Make sure user is allowed to modify this row
        assert_authorized_to_edit(token_info, row)
        for col in body[identifier].keys():
            # Make sure is allowed to modify that column
            if not col in allowed_cols:
                raise Forbidden(f"You are not authorized to edit column -{col}-")
    # TODO: Verify that none of these cells are already approved
    update_analysis(body)
    res = dict()
    for u in updates:
        res[u] = get_single_analysis(u)
    return jsonify(res)
Exemplo n.º 9
0
def forget_pii(user, token_info, identifier_type, identifier):
    assert_user_has("gdpr.manage", token_info)
    res, ids = forget_user_data(identifier_type, identifier)
    audit_gdpr_forget(token_info, ids)
    return res
Exemplo n.º 10
0
def cancel_approval(user, token_info, approval_id: str):
    assert_user_has("approve", token_info)
    res = revoke_approval(token_info["email"], approval_id)
    return None if res.modified_count > 0 else abort(404)