示例#1
0
def read_file(request_files):
    """Given a submitted_id string, a submitter_label string,
    and Django request.FILES object with one file,
    return a request object with a "filename" string and a "bytes" BytesIO."""
    if len(request_files.keys()) < 1:
        return failure("No files in request")
    if len(request_files.keys()) > 1:
        return failure("Multiple upload files not allowed")

    upload_file = list(request_files.values())[0]
    filename = upload_file.name
    if not filename.endswith(".xlsx"):
        return failure("Only .xlsx files are supported at this time.")
    content = BytesIO()
    try:
        for chunk in upload_file.chunks():
            content.write(chunk)
    except Exception as e:
        return failure("Invalid upload", {"exception": e})

    return success({
        "filename": filename,
        "content type": xlsx,
        "content": content
    })
示例#2
0
def get_assay_header(column):
    """Given a column name that is an OBI or ONTIE ID
    (with an optional suffix for stddev, normalized, or qualitative),
    return the pair of a header dict and an error dict."""
    header = None
    assay_id = column.replace("obi_", "OBI:").replace("ontie_", "ONTIE:")
    if assay_id in config.labels and config.labels[assay_id] in config.assays:
        header = config.assays[config.labels[assay_id]].copy()
    elif assay_id in config.labels and config.labels[
            assay_id] in config.parameters:
        header = config.parameters[config.labels[assay_id]].copy()
    if header:
        return header, None

    root_id = assay_id.replace("_stddev",
                               "").replace("_normalized",
                                           "").replace("_qualitative", "")
    if root_id not in config.labels:
        return None, failure(
            f"Unrecognized assay '{root_id}' for column '{column}'")
    root_label = config.labels[root_id]

    if root_label in config.assays:
        header = config.assays[root_label].copy()
    elif root_label in config.parameters:
        header = config.parameters[root_label].copy()
    else:
        return None, failure(
            f"Unrecognized assay '{root_id}' for column '{column}'")

    if column.endswith("_stddev"):
        header["label"] = f"Standard deviation in {header['units']}"
        header[
            "description"] = f"The standard deviation of the value in '{root_label}'"
        header.pop("example", None)
    elif column.endswith("_normalized"):
        header["label"] = f"{root_label} normalized value"
        header["type"] = "score 0-1"
        header[
            "description"] = f"The normalized value for '{root_label}' from 0-1"
        header.pop("example", None)
    elif column.endswith("_qualitative"):
        header["label"] = f"{root_label} qualitative value"
        header["type"] = "text"
        header["terminology"] = "qualitative_measures"
        header["description"] = f"The qualitative value for '{root_label}'"
        header.pop("example", None)
    else:
        return None, failure(
            f"Unrecognized assay suffix for column '{column}'")

    return header, None
def get_value(scope, dataset, key=None):
    """Given a scope (staging or secret), a dataset ID, and an optional key,
    return the value or values in the 'data' key."""
    try:
        return success({"data": datasets.get_value(scope, dataset, key)})
    except Exception as e:
        return failure(e)
示例#4
0
def get_assay_headers(dataset_id):
    """Given dataset ID, return the assay headers."""
    dataset_path = get_staging_path(dataset_id)
    if dataset_id == "spr":
        path = "examples/spr-dataset.yml"
    elif not config.staging:
        raise Exception("CVDB_STAGING directory is not configured")
    else:
        path = os.path.join(dataset_path, "dataset.yml")

    if not os.path.isfile(path):
        raise Exception(f"File does not exist '{path}'")
    with open(path, "r") as f:
        dataset = yaml.load(f, Loader=yaml.SafeLoader)
    columns = dataset["Columns"]

    headers = []
    terminology_count = 0
    for column in columns:
        header = None
        if column in config.fields:
            header = config.fields[column].copy()
        elif column.startswith("obi_") or column.startswith("ontie_"):
            header, error = get_assay_header(column)
            if error:
                return error
        if not header:
            return failure(f"Unrecognized column '{column}'")
        if not isinstance(header, dict):
            return failure(f"Error processing column '{column}': {header}")
        header["value"] = column
        header["locked"] = True
        if "terminology" in header and header["terminology"] != "":
            terms = list(getattr(config, header["terminology"]))
            col = chr(65 + terminology_count)
            end = len(terms) + 1
            formula = f"=Terminology!${col}$2:${col}${end}"
            header["terminology"] = terms
            header["validations"] = [{
                "type": "list",
                "formula1": formula,
                "allow_blank": True
            }]
            terminology_count += 1
        headers.append(header)

    return headers
def set_value(scope, dataset, key, value):
    """Given a scope (staging or secret), a dataset ID, a key string, and a simle value,
    that can be represented in YAML,
    add the key and value to the dataset metadata,
    maybe overwriting it."""
    try:
        datasets.set_value(scope, dataset, key, value)
        return success()
    except Exception as e:
        return failure(e)
def read_path(path, sheet=None):
    """Read a TSV or Excel from a path and return a response with a "table" key."""
    table = None
    filename, extension = os.path.splitext(path)
    extension = extension.lower()
    if extension == ".xlsx":
        table = workbooks.read(path, sheet)
    elif extension == ".tsv":
        table = tables.read_tsv(path)
    else:
        return failure(f"Unsupported input format for '{path}'")
    return success({"table": table})
示例#7
0
def get_secret_value(dataset_id, key=None):
    """Given a dataset ID and an optional key
    return the value or values from the dataset secret metadata."""
    if key in ["ds_id"]:
        return failure(f"Key '{key}' cannot be updated")
    path = os.path.join(config.secret.working_tree_dir, "datasets.tsv")
    rows = tables.read_tsv(path)
    for row in rows:
        if row["ds_id"] == dataset_id:
            if key:
                return row[key]
            else:
                return row
    raise Exception(f"No row found for dataset '{dataset_id}'")
示例#8
0
def promote(name, email, dataset_id):
    author = Actor(name, email)

    # staging
    if not config.staging:
        return failure("CVDB_STAGING directory is not configured")
    staging_dataset_path = os.path.join(config.staging.working_tree_dir,
                                        "datasets", str(dataset_id))
    paths = []
    try:
        set_staging_value(dataset_id, "Dataset status", "promoted")
        path = os.path.join(staging_dataset_path, "dataset.yml")
        paths.append(path)
    except Exception as e:
        return failure("Failed to update dataset status", {"exception": e})
    try:
        config.staging.index.add(paths)
        config.staging.index.commit(f"Promote dataset {dataset_id}",
                                    author=author,
                                    committer=config.covic)
    except Exception as e:
        return failure(f"Failed to commit '{path}'", {"exception": e})

    # public
    if not config.public:
        return failure("CVDB_PUBLIC directory is not configured")
    public_dataset_path = os.path.join(config.public.working_tree_dir,
                                       "datasets", str(dataset_id))
    try:
        os.makedirs(public_dataset_path)
    except Exception as e:
        return failure(f"Could not create '{path}'", {"exception": e})
    try:
        paths = []
        for filename in ["dataset.yml", "assays.tsv"]:
            src = os.path.join(staging_dataset_path, filename)
            dst = os.path.join(public_dataset_path, filename)
            shutil.copyfile(src, dst)
            paths.append(dst)
    except Exception as e:
        return failure(f"Could not copy '{src}' to '{dst}'", {"exception": e})
    try:
        config.public.index.add(paths)
        config.public.index.commit(f"Promote dataset {dataset_id}",
                                   author=config.covic,
                                   committer=config.covic)
    except Exception as e:
        return failure(f"Failed to commit '{public_dataset_path}'",
                       {"exception": e})

    print(f"Promoted dataset {dataset_id} from staging to public")
    return success({"dataset_id": dataset_id})
示例#9
0
def set_secret_value(dataset_id, key, value):
    """Given a dataset ID, key, and value,
    update the secret `datasets.tsv`."""
    if key in ["ds_id"]:
        return failure(f"Key '{key}' cannot be updated")
    path = os.path.join(config.secret.working_tree_dir, "datasets.tsv")
    rows = tables.read_tsv(path)
    done = False
    for row in rows:
        if row["ds_id"] == dataset_id:
            row[key] = str(value)
            done = True
        elif key not in row:
            row[key] = None
    if done:
        tables.write_tsv(rows, path)
    else:
        raise Exception(f"No row found for dataset '{dataset_id}'")
def read(source, sheet=None):
    """Read a source and return a response with a "table" key."""
    if tables.is_table(source):
        return success({"table": source})
    if responses.is_response(source):
        if "table" in source:
            return success({"table": source["table"]})
        else:
            return failure(f"Response does not have 'table': '{source}'")
    if isinstance(source, str) or hasattr(source, "read"):
        return read_path(source, sheet)
    if requests.is_request(source):
        response = requests.read_file(source)
        if failed(response):
            return response
        table = workbooks.read(response["content"], sheet)
        return success({"table": table})
    raise Exception(f"Unknown input '{source}'")
def convert(source, destination):
    """Given a source and a destimation (format or path)
    convert the table to that format
    and return a response with a "content" key."""
    table = None
    grid = None

    if grids.is_grid(source):
        grid = source
    else:
        response = read(source)
        if failed(response):
            return response
        table = response["table"]

    output_format = destination.lower()
    if output_format not in ["tsv", "html"]:
        filename, extension = os.path.splitext(destination)
        output_format = extension.lower().lstrip(".")
    if output_format.lower() == "tsv":
        content = tables.table_to_tsv_string(table)
        return success({
            "table": table,
            "content type": responses.tsv,
            "content": content
        })
    elif output_format.lower() == "html":
        if not grid:
            grid = grids.table_to_grid(config.prefixes, config.fields, table)
        html = grids.grid_to_html(grid)
        content = templates.render_html("templates/grid.html", {"html": html})
        return success({
            "table": table,
            "grid": grid,
            "html": html,
            "content type": responses.html,
            "content": content,
        })
    else:
        return failure(f"Unsupported output format for '{destination}'")
示例#12
0
def validate(headers, table):
    """Given the headers and a (validated!) table,
    return a response with "grid" and maybe "errors"."""
    errors = []
    rows = []
    unique = defaultdict(set)
    blinded_antibodies = config.read_blinded_antibodies()
    ab_ids = [x["ab_id"] for x in blinded_antibodies
              ] + [x["id"] for x in config.ab_controls.values()]
    ab_labels = [x["ab_id"].replace(":", "-")
                 for x in blinded_antibodies] + list(config.ab_controls.keys())

    columns = set()
    for header in headers:
        try:
            columns.add(header["label"])
        except KeyError as e:
            raise Exception(f"Bad header {header}", e)

    new_table = []
    for i in range(0, len(table)):
        row = table[i]

        # Skip blank rows
        values = ""
        for value in row.values():
            values += str(value).strip()
        if values == "":
            continue
        new_table.append(row)

        extra_columns = set(row.keys()) - columns
        extra_columns.discard(None)
        if extra_columns:
            extra = ", ".join(extra_columns)
            errors.append(f"Extra columns not allowed: {extra}")

        missing_columns = columns - set(row.keys())
        if missing_columns:
            missing = ", ".join(missing_columns)
            errors.append(f"Missing columns: {missing}")

        newrow = []
        for header in headers:
            column = header["label"]
            error = None
            if column not in row:
                # Should be handled above
                continue
            value = str(row[column]).strip()
            if "field" in header and header["field"] == "ab_id":
                if value not in ab_ids:
                    error = (
                        f"'{value}' is not a valid COVIC antibody ID or control antibody ID "
                        + "in column 'Antibody ID'")
            elif "field" in header and header["field"] == "ab_label":
                if value.lower() in ["na", "n/a"]:
                    continue
                if value not in ab_labels:
                    error = (
                        f"'{value}' is not a valid COVIC antibody label or control antibody label "
                        + "in column 'Antibody label'")
            elif "required" in header and header["required"] and value == "":
                error = f"Missing required value in column '{column}'"
            elif "unique" in header and header["unique"] and value in unique[
                    column]:
                error = f"Duplicate value '{value}' is not allowed in column '{column}'"
            elif "terminology" in header and value != "" and value not in header[
                    "terminology"]:
                error = f"'{value}' is not a valid term in column '{column}'"
            elif "type" in header and value != "":
                error = validate_field(column, header["type"], value)
            if "unique" in header and header["unique"]:
                unique[column].add(value)

            cell = None
            if error:
                cell = grids.error_cell(value, error)
                errors.append("Error in row {0}: {1}".format(i + 2, error))
            else:
                cell = grids.value_cell(value)
            newrow.append(cell)

        rows.append(newrow)

    table = new_table
    grid = {"headers": [headers], "rows": rows}
    unique_errors = []
    for error in errors:
        if error not in unique_errors:
            unique_errors.append(error)
    errors = unique_errors
    error_count = len(errors)
    if error_count > 0:
        return failure(
            f"There were {error_count} errors",
            {
                "errors": errors,
                "table": table,
                "grid": grid
            },
        )
    return success({"table": table, "grid": grid})
def submit(name, email, organization, table):
    """Given a new table of antibodies:
    1. validate it
    2. assign IDs and append them to the secrets,
    3. append the blinded antibodies to the staging table,
    4. return a response with merged IDs."""
    response = validate(table)
    if failed(response):
        return response
    table = response["table"]  # blank rows removed

    if not config.secret:
        return failure("CVDB_SECRET directory is not configured")
    secret = []
    path = os.path.join(config.secret.working_tree_dir, "antibodies.tsv")
    if os.path.isfile(path):
        secret = tables.read_tsv(path)

    blind = config.read_blinded_antibodies()

    if len(secret) != len(blind):
        return failure(f"Different number of antibody rows: {len(secret)} != {len(blind)}")

    current_id = "COVIC:0"
    if len(blind) > 0:
        current_id = blind[-1]["ab_id"]

    submission = []
    for row in table:
        current_id = names.increment_id(current_id)

        # secrets: write this to the secret repo
        secret_row = OrderedDict()
        secret_row["ab_id"] = current_id
        secret_row["ab_name"] = row["Antibody name"]
        secret_row["ab_details"] = row["Antibody details"]
        secret_row["ab_comment"] = row["Antibody comment"]
        secret_row["org_name"] = organization
        secret_row["submitter_email"] = email
        secret.append(secret_row)

        # blind: write this to staging/public repos
        blind_row = OrderedDict()
        blind_row["ab_id"] = current_id

        # submission: return this to the submitter
        submission_row = OrderedDict()
        submission_row["ab_id"] = current_id
        submission_row["ab_name"] = row["Antibody name"]

        # for each header, add cells to blind and submission
        for header in headers[1:]:
            column = header["value"]
            value = row[header["label"]]
            if column.endswith("_label"):
                i = config.ids.get(value, "")
                blind_row[column.replace("_label", "_id")] = i
                submission_row[column.replace("_label", "_id")] = i
                submission_row[column] = value
            else:
                blind_row[column] = value
                submission_row[column] = value

        blind.append(blind_row)
        submission.append(submission_row)

    author = Actor(name, email)

    # secret
    try:
        path = os.path.join(config.secret.working_tree_dir, "antibodies.tsv")
        tables.write_tsv(secret, path)
    except Exception as e:
        return failure(f"Failed to write '{path}'", {"exception": e})
    try:
        config.secret.index.add([path])
        config.secret.index.commit("Submit antibodies", author=author, committer=config.covic)
    except Exception as e:
        return failure(f"Failed to commit '{path}'", {"exception": e})

    # staging
    try:
        path = os.path.join(config.staging.working_tree_dir, "antibodies.tsv")
        tables.write_tsv(blind, path)
    except Exception as e:
        return failure(f"Failed to write '{path}'", {"exception": e})
    try:
        config.staging.index.add([path])
        config.staging.index.commit("Submit antibodies", author=author, committer=config.covic)
    except Exception as e:
        return failure(f"Failed to commit '{path}'", {"exception": e})

    # public
    if not config.public:
        return failure("CVDB_PUBLIC directory is not configured")
    try:
        path = os.path.join(config.public.working_tree_dir, "antibodies.tsv")
        tables.write_tsv(blind, path)
    except Exception as e:
        return failure(f"Failed to write '{path}'", {"exception": e})
    try:
        config.public.index.add([path])
        config.public.index.commit("Submit antibodies", author=config.covic, committer=config.covic)
    except Exception as e:
        return failure(f"Failed to commit '{path}'", {"exception": e})

    grid = grids.table_to_grid(config.prefixes, config.fields, submission)
    print("Submitted antibodies")
    return success({"table": submission, "grid": grid})
示例#14
0
def submit(name, email, dataset_id, table):
    """Given a dataset ID and a new table of assays,
    validate it, save it to staging, and commit."""
    response = validate(dataset_id, table)
    if failed(response):
        return response
    table = response["table"]  # remove blank rows

    ab_ids = {}
    for ab in config.read_blinded_antibodies():
        ab_id = ab["ab_id"]
        ab_label = ab_id.replace(":", "-")
        ab_ids[ab_label] = ab_id
    for row in config.ab_controls.values():
        ab_ids[row["label"]] = row["id"]

    assay_headers = get_assay_headers(dataset_id)
    assays = []
    for row in table:
        assay = OrderedDict()
        for header in assay_headers:
            value = header["value"]
            label = header["label"]
            if value == "ab_label":
                row[label] = row[label].strip()
                assay["ab_id"] = ab_ids[row[label]]
            else:
                assay[value] = row[label]
        assays.append(assay)

    author = Actor(name, email)

    # staging
    if not config.staging:
        return failure("CVDB_STAGING directory is not configured")
    dataset_path = os.path.join(config.staging.working_tree_dir, "datasets",
                                str(dataset_id))
    paths = []
    try:
        set_staging_value(dataset_id, "Dataset status", "submitted")
        path = os.path.join(dataset_path, "dataset.yml")
        paths.append(path)
    except Exception as e:
        return failure("Failed to update dataset status", {"exception": e})
    try:
        path = os.path.join(dataset_path, "assays.tsv")
        tables.write_tsv(assays, path)
        paths.append(path)
    except Exception as e:
        return failure(f"Failed to write '{path}'", {"exception": e})
    try:
        config.staging.index.add(paths)
        config.staging.index.commit(
            f"Submit assays to dataset {dataset_id}",
            author=author,
            committer=config.covic,
        )
    except Exception as e:
        return failure(f"Failed to commit '{path}'", {"exception": e})

    grid = grids.table_to_grid(config.prefixes, config.fields, table)
    print(f"Submitted assays to dataset {dataset_id}")
    return success({"table": table, "grid": grid, "dataset_id": dataset_id})
示例#15
0
def create(name, email, columns=[]):
    if not config.staging:
        return failure("CVDB_STAGING directory is not configured")

    for column in columns:
        if column in config.fields:
            continue
        if column.startswith("obi_") or column.startswith("ontie_"):
            assay_id = column.replace("obi_",
                                      "OBI:").replace("ontie_", "ONTIE:")
            root_id = (assay_id.replace("_stddev", "").replace(
                "_normalized", "").replace("_qualitative", ""))
            if assay_id in config.labels:
                continue
            if root_id in config.labels:
                if column.endswith("_stddev"):
                    continue
                if column.endswith("_normalized"):
                    continue
                if column.endswith("_qualitative"):
                    continue
        return failure(f"Unrecognized column '{column}'")

    datasets_path = os.path.join(config.staging.working_tree_dir, "datasets")
    current_id = 0
    if not os.path.exists(datasets_path):
        os.makedirs(datasets_path)
    if not os.path.isdir(datasets_path):
        return failure(f"'{datasets_path}' is not a directory")
    for root, dirs, files in os.walk(datasets_path):
        for name in dirs:
            if re.match(r"\d+", name):
                current_id = max(current_id, int(name))
    dataset_id = current_id + 1

    author = Actor(name, email)

    # secret
    try:
        path = os.path.join(config.secret.working_tree_dir, "datasets.tsv")
        datasets = []
        if os.path.isfile(path):
            datasets = tables.read_tsv(path)
        datasets.append(
            OrderedDict({
                "ds_id": dataset_id,
                "submitter_email": email
            }))
        tables.write_tsv(datasets, path)
    except Exception as e:
        return failure(f"Failed to update '{path}'", {"exception": e})
    try:
        config.secret.index.add([path])
        config.secret.index.commit(f"Create dataset {dataset_id}",
                                   author=author,
                                   committer=config.covic)
    except Exception as e:
        return failure(f"Failed to commit '{path}'", {"exception": e})

    # staging
    try:
        dataset_path = os.path.join(datasets_path, str(dataset_id))
        os.mkdir(dataset_path)
    except Exception as e:
        return failure(f"Failed to create '{dataset_path}'", {"exception": e})
    try:
        dataset = {
            "Dataset ID": f"ds:{dataset_id}",
            "Dataset status": "configured",
            "Columns": columns,
        }
        path = os.path.join(dataset_path, "dataset.yml")
        with open(path, "w") as outfile:
            yaml.dump(dataset, outfile, sort_keys=False)
    except Exception as e:
        return failure(f"Failed to write '{path}'", {"exception": e})
    try:
        config.staging.index.add([path])
        config.staging.index.commit(f"Create dataset {dataset_id}",
                                    author=author,
                                    committer=config.covic)
    except Exception as e:
        return failure(f"Failed to commit '{path}'", {"exception": e})

    print(f"Created dataset {dataset_id}")
    return success({"dataset_id": dataset_id})