Пример #1
0
def update_column(dataset, column, featuretype):
    """
    Updates a column from a dataset.

    Paramters
    ---------
    dataset : str
        The dataset name.
    column : str
        The column name.
    featuretype : str
        The feature type (Numerical, Categorical, or DateTime).

    Returns
    -------
    dict
        The column info.

    Raises
    ------
    NotFound
        When the dataset or column does not exist.

    BadRequest
        When the featuretype is invalid.
    """
    try:
        metadata = stat_dataset(dataset)

        if "columns" not in metadata or "featuretypes" not in metadata:
            raise COLUMN_NOT_FOUND

        columns = metadata["columns"]

        if column not in columns:
            raise COLUMN_NOT_FOUND

        # sets new metadata
        index = columns.index(column)
        metadata["featuretypes"][index] = featuretype

        validate_featuretypes(metadata["featuretypes"])

        df = load_dataset(dataset)

        # uses PlatIAgro SDK to save the dataset
        save_dataset(dataset, df, metadata=metadata)
    except FileNotFoundError:
        raise DATASET_NOT_FOUND
    except ValueError as e:
        raise BadRequest("ValueError", str(e))

    return {"name": column, "featuretype": featuretype}
Пример #2
0
def patch_dataset(name, file_object):
    """
    Update the dataset metadata in our object storage.

    Parameters
    ----------
    name : str
        The dataset name to look for in our object storage.

    file_object : dict
        File object.

    Returns
    -------
    dict
        The dataset details: name, columns, and filename.

    Raises
    ------
    BadRequest
        When incoming files are missing or invalid.

    NotFound
        When the dataset does not exist
    """
    if not file_object.file:
        raise BadRequest("NoFeatureTypes", "No featuretypes part")

    try:
        metadata = stat_dataset(name)
    except FileNotFoundError:
        raise NOT_FOUND

    try:
        ftype_file = file_object.file
        featuretypes = list(
            map(lambda s: s.strip().decode("utf8"), ftype_file.readlines())
        )
        validate_featuretypes(featuretypes)
    except ValueError as e:
        raise BadRequest("ValueError", str(e))

    columns = metadata["columns"]
    if len(columns) != len(featuretypes):
        raise BadRequest(
            "DifferentLengths",
            "featuretypes must be the same length as the DataFrame columns"
        )

    # uses PlatIAgro SDK to update the dataset metadata
    metadata["featuretypes"] = featuretypes
    update_dataset_metadata(name=name, metadata=metadata)
    return get_dataset(name)
Пример #3
0
    def test_validate_featuretypes(self):
        featuretypes = []
        validate_featuretypes(featuretypes)

        featuretypes = [DATETIME, CATEGORICAL, NUMERICAL, "int"]
        with self.assertRaises(ValueError):
            validate_featuretypes(featuretypes)

        featuretypes = [DATETIME]
        validate_featuretypes(featuretypes)

        featuretypes = [CATEGORICAL]
        validate_featuretypes(featuretypes)

        featuretypes = [NUMERICAL]
        validate_featuretypes(featuretypes)
Пример #4
0
def update_column(dataset: str, column: str,
                  featuretype: str) -> Dict[str, str]:
    """Updates a column from a dataset.

    Args:
        dataset (str): the dataset name.
        column (str): the column name.
        featuretype (str): the feature type (Numerical, Categorical, or DateTime).

    Returns:
        The column info.

    Raises:
        NotFound: when the dataset or column does not exist.
        BadRequest: when the featuretype is invalid.
    """
    try:
        metadata = stat_dataset(dataset)

        if "columns" not in metadata or "featuretypes" not in metadata:
            raise NotFound("The specified column does not exist")

        columns = metadata["columns"]

        if column not in columns:
            raise NotFound("The specified column does not exist")

        # sets new metadata
        index = columns.index(column)
        metadata["featuretypes"][index] = featuretype

        validate_featuretypes(metadata["featuretypes"])

        df = load_dataset(dataset)

        # uses PlatIAgro SDK to save the dataset
        save_dataset(dataset, df, metadata=metadata)
    except FileNotFoundError:
        raise NotFound("The specified dataset does not exist")
    except ValueError as e:
        raise BadRequest(str(e))

    return {"name": column, "featuretype": featuretype}
Пример #5
0
def create_dataset(files: Dict[str, IO]) -> Dict[str, Any]:
    """Creates a new dataset in our object storage.

    Args:
        files (dict): file objects.

    Returns:
        The dataset details: name, columns, and filename.

    Raises:
        BadRequest: when incoming files are missing or valid.
    """
    # checks if the post request has the file part
    if "file" not in files:
        raise BadRequest("No file part")
    file = files["file"]

    # if user does not select file, the browser also
    # submits an empty part without filename
    if file.filename == "":
        raise BadRequest("No selected file")

    # generate a dataset name from filename
    name = generate_name(file.filename)

    try:
        # reads file into a DataFrame
        df = read_into_dataframe(file, file.filename)
    except Exception as e:
        # if read fails, then uploads raw file
        save_dataset(name, file)
        return {"name": name, "filename": file.filename}

    columns = df.columns.values.tolist()

    # checks if the post request has the 'featuretypes' part
    if "featuretypes" in files:
        try:
            ftype_file = files["featuretypes"]
            featuretypes = list(
                map(lambda s: s.strip().decode("utf8"),
                    ftype_file.readlines()))
            validate_featuretypes(featuretypes)
        except ValueError as e:
            raise BadRequest(str(e))

        if len(columns) != len(featuretypes):
            raise BadRequest(
                "featuretypes must be the same length as the DataFrame columns"
            )
    else:
        featuretypes = infer_featuretypes(df)

    metadata = {
        "featuretypes": featuretypes,
        "original-filename": file.filename,
    }

    # uses PlatIAgro SDK to save the dataset
    save_dataset(name, df, metadata=metadata)

    columns = [{
        "name": col,
        "featuretype": ftype
    } for col, ftype in zip(columns, featuretypes)]
    return {"name": name, "columns": columns, "filename": file.filename}