Exemplo n.º 1
0
    def test_simplefilter():
        """
        :return:

        {'a': {'full': {1:[average, std,min,max], 2:.....}, 'step2': : {1:[average, std,min,max], 2:.....},...},
         'b': {.....}, 'c': {.....}, 'd': {} }


        """

        #download data
        folder = "/vagrant/code/pysdss/data/output/text/"
        experimentfolder = "/vagrant/code/pysdss/experiments/interpolation/"

        id = utils.create_uniqueid()

        # create a directory to store settings
        #if not os.path.exists(experimentfolder + str(id)):
        #   os.mkdir(experimentfolder + str(id))

        dowload_all_data_test(id, folder)

        df = pd.read_csv(folder + id + ".csv")
        # strip and lower column names
        df.columns = df.columns.str.strip()
        df.columns = df.columns.str.lower()

        #calculate statistics by row

        result = {}

        for n in ["a", "b", "c", "d"]:

            stats = filter.calculate_stat_byrow(df, rowname="row", colname=n)
            a = {"full": {}}
            for i in stats:
                a['full'].update(i)

            # for increasing steps

            for i in [2, 3, 4]:
                # calculate statistics by row
                keep, delete = filter.filter_bystep(df, step=i)

                stats = filter.calculate_stat_byrow(keep,
                                                    rowname="row",
                                                    colname=n)

                b = {"step" + str(i): {}}
                for j in stats:
                    b["step" + str(i)].update(j)

                a.update(b)

            result[n] = a

        return result
Exemplo n.º 2
0
    def dowload_all_data_test(id, path):
        """
        download data for all 4 grades
        :param id:
        :param path:
        :return:
        """
        leftclm = ["row", "id_sdata", "lat", "lon"]
        rightclmn = [
            "a", "b", "c", "d", "keepa::int", "keepb::int", "keepc::int",
            "keepd::int"
        ]

        outfile = path + id + ".csv"
        # complete dataset
        dt.download_data("id_sdata",
                         "data.SensoDatum",
                         "id_sensor",
                         leftclm,
                         "id_sdata",
                         "data.colorgrade",
                         rightclmn,
                         datasetid,
                         outfile,
                         orderclms=["row", "id_sdata"],
                         conndict=connstring)
        name = utils.create_uniqueid()
        outfile = "/vagrant/code/pysdss/data/output/" + name + ".csv"
        # clipped dataset, result ordered
        dt.download_data("id_sdata",
                         "data.SensoDatum",
                         "id_sensor",
                         leftclm,
                         "id_sdata",
                         "data.colorgrade",
                         rightclmn,
                         datasetid,
                         outfile,
                         leftboundgeom="geom",
                         boundtable="data.boundary",
                         righttboundgeom="geom",
                         boundid="id_bound",
                         boundvalue=1,
                         orderclms=["row", "id_sdata"],
                         conndict=connstring)
Exemplo n.º 3
0
    def test_dense_berry_work():

        # 1 create unique id and set the output folder
        folder = "/vagrant/code/pysdss/data/output/text/"
        id = utils.create_uniqueid()

        if not os.path.exists(folder + id):
            os.mkdir(folder + id)
        outfolder = folder + id + "/"

        file = "/vagrant/code/pysdss/data/input/2016_06_17.csv"
        file = shutil.copy(file, outfolder + "/" + id + "_keep.csv")  # copy to the directory

        # 5 set data properties: correct field names if necessary
        usecols = ["%Dataset_id", " Row", " Raw_fruit_count", " Visible_fruit_per_m", " Latitude", " Longitude",
                   "Harvestable_A", "Harvestable_B", "Harvestable_C", "Harvestable_D"]

        new_column_names = ["id", "row", "raw_fruit_count", "visible_fruit_per_m", "lat", "lon", "a", "b", "c", "d"]

        # 6 set data properties: automatically calculate the average distance between points after checking  if
        # duplicate points exists, (the user can also set an average distance from the web application)

        df = utils.remove_duplicates(file, [" Latitude", " Longitude"])
        df.to_csv(file, index=False)
        del df

        # 7 calculate statistics along the rows
        id, stats = berrycolor_workflow(id, file, usecols, new_column_names, outfolder, average_point_distance=None,
                                          grid=None, rowdirection="x",
                                          area_multiplier=2, filterzero=False, project="32611", rows_from_rawdata=True,
                                          nodata=-1, force_interpolation=False) #set force_interpolation=False to skip interpolation
        # 8 interpret statistics
        fileIO.save_object(outfolder + "/_stats", stats)
        # stats = fileIO.load_object( folder+str(id)+"/_stats")
        interpret_result(stats, outfolder)

    #test_dense_berry_work()
Exemplo n.º 4
0
    def post(self, request, format=None):

        try:

            up_file = request.FILES['file']
            if not up_file:
                #raise Exception("file is missing, upload again")
                return Response({
                    "success": False,
                    "content": "file is missing, upload again"
                })

            #check mandatory metadata fields   #todo: should this be done with the geoprocessing.json file?
            for i in settings.METADATA_MANDATORY_FIELDS:
                if not request.data.get(i):
                    #raise Exception("mandatory POST parameter " + i + " must be available")
                    return Response({
                        "success":
                        False,
                        "content":
                        "mandatory POST parameter " + i + " must be available"
                    })

            # create unique identifier to name the folder
            idf = utils.create_uniqueid()

            #check extension, if not correct
            if up_file.name.split('.')[-1] not in settings.UPLOAD_FORMATS:
                return Response({
                    "success":
                    False,
                    "content":
                    "format should be one of " + str(settings.UPLOAD_FORMATS)
                })

            if not os.path.exists(settings.UPLOAD_ROOT + idf):
                os.mkdir(settings.UPLOAD_ROOT + idf)

            destination = open(settings.UPLOAD_ROOT + idf + "/" + up_file.name,
                               'wb')
            for chunk in up_file.chunks():
                destination.write(chunk)
                destination.close()

            # check .zip has a shapefile and unzip it if OK
            if up_file.name.split('.')[-1].lower() == "zip":
                zipShape = None
                z = None
                try:
                    z = open(settings.UPLOAD_ROOT + idf + "/" + up_file.name,
                             "rb")
                    zipShape = zipfile.ZipFile(z)
                    # extract file suffixes in a set
                    zfiles = {i.split('.')[-1] for i in zipShape.namelist()}
                    #and check the mandatory shapefile files are there
                    if not zfiles >= set(settings.SHAPE_MANDATORY_FILES):
                        return Response({
                            "success":
                            False,
                            "content":
                            "shapefiles files should be " +
                            str(settings.SHAPE_MANDATORY_FILES)
                        })
                    else:
                        for fileName in zipShape.namelist():  # unzip files
                            out = open(
                                settings.UPLOAD_ROOT + idf + "/" + fileName,
                                "wb")
                            out.write(zipShape.read(fileName))
                            out.close()
                except Exception as e:  #for any other unexpected error
                    return Response({
                        "success":
                        False,
                        "content":
                        "problems extracting the zip file, try to upload again"
                    })
                finally:
                    if zipShape: zipShape.close()
                    if z: z.close()

            # ...
            # do some stuff with uploaded file
            #    file = request.Files['data']
            #   data = file.read(
            # ...

            #upload the metadata to the database table and get back the id for the new row

            # todo: this is to be converted to a celery process, but it is necessary to delete the inmemory file from the request data to avoid serialization errors

            iddataset = query.upload_metadata(request.data,
                                              settings.METADATA_ID_TYPES,
                                              settings.METADATA_FIELDS_TYPES,
                                              settings.METADATA_IDS)

            #return Response(up_file.name, status.HTTP_201_CREATED)

            #return the folderID, the metadata newrow ID, and the file extension
            return Response({
                "success":
                True,
                "content": [idf, iddataset,
                            up_file.name.split('.')[-1]]
            })
        except Exception as e:
            return Response({
                "success": False,
                "content": str(e)
            })  #errors coming from query.upload_metadata
Exemplo n.º 5
0
    def test_berry_work2():
        # 1 create unique id and set the output folder
        folder = "/vagrant/code/pysdss/data/output/text/"
        id = utils.create_uniqueid()

        try:
            # 2 save the ID IN THE DATABASE
            dt.create_process(dt.default_connection, id, "claudio", dt.geotypes.interpolation.name)

            # 3 -4  dowload the pointdata from the database, create new folder #TODO: create function to read from the database
            if not os.path.exists(folder + id):
                os.mkdir(folder + id)
            outfolder = folder + id + "/"

            file = "/vagrant/code/pysdss/data/input/2016_06_17.csv"
            file = shutil.copy(file, outfolder + "/" + id + "_keep.csv")  # copy to the directory
            ##########

            # 5 set data properties: correct field names if necessary
            usecols = ["%Dataset_id", " Row", " Raw_fruit_count", " Visible_fruit_per_m", " Latitude", " Longitude",
                       "Harvestable_A", "Harvestable_B", "Harvestable_C", "Harvestable_D"]

            #this names are mandatory for the current implementation
            new_column_names = ["id", "row", "raw_fruit_count", "visible_fruit_per_m", "lat", "lon", "a", "b", "c", "d"]

            # 6 set data properties: automatically calculate the average distance between points after checking  if
            # duplicate points exists, (the user can also set an average distance from the web application)

            df = utils.remove_duplicates(file, [" Latitude", " Longitude"])
            df.to_csv(file, index=False)
            del df

            # avdist = utils.average_point_distance(file, " Longitude", " Latitude", " Row", direction="x",remove_duplicates=False)

            # 7 calculate statistics along the rows
            id, stats = berrycolor_workflow(id, file, usecols, new_column_names, outfolder, average_point_distance=None,
                                              grid=None, rowdirection="x",
                                              area_multiplier=2, filterzero=False, project="32611", rows_from_rawdata=True,
                                              nodata=-1, force_interpolation=True)
            # 8 interpret statistics
            fileIO.save_object(outfolder + "/_stats", stats)
            # stats = fileIO.load_object( folder+str(id)+"/_stats")
            interpret_result(stats, outfolder)

            # 9 save the operation state to the database

            js = json.dumps(
                {
                    "result": [
                        {
                            "type": "csvtable",
                            "name": "clorgrade_a_table",
                            "path": outfolder + "/_a_output.csv"
                        }, {
                            "type": "csvtable",
                            "name": "clorgrade_b_table",
                            "path": outfolder + "/_b_output.csv"
                        }, {
                            "type": "csvtable",
                            "name": "clorgrade_c_table",
                            "path": outfolder + "/_c_output.csv"
                        }, {
                            "type": "csvtable",
                            "name": "clorgrade_d_table",
                            "path": outfolder + "/_d_output.csv"
                        }
                    ],
                    "error": {}
                }
            )

            dt.update_process(dt.default_connection, id, "claudio", dt.geomessages.completed.name, js)

            #### use geostatistics to get the image rasterized image (indicator kriging?)

        except Exception as e:

            js = json.dumps(
                {
                    "result": [],
                    "error": {"message": str(e)}
                }
            )
            dt.update_process(dt.default_connection, id, "claudio", dt.geomessages.error.name, js)
Exemplo n.º 6
0
def check_file_upload(request):
    """
    Check the uploaded files are csv or zipped shapefiles
    :param request: the django request object with the request parameters
    :return: [True, {<newparameters>}] when no problem, otherwise [False,'<message>']
    """

    # create unique identifier to name the folder
    idf = utils.create_uniqueid()

    # check extension, if not correct
    up_file = request.FILES['file']
    if up_file.name.split('.')[-1] not in settings.UPLOAD_FORMATS:
        #return Response({"success": False, "content": "format should be one of " + str(settings.UPLOAD_FORMATS)})
        return [
            False, "format should be one of " + str(settings.UPLOAD_FORMATS)
        ]

    if not os.path.exists(settings.UPLOAD_ROOT + idf):
        os.mkdir(settings.UPLOAD_ROOT + idf)

    destination = open(settings.UPLOAD_ROOT + idf + "/" + up_file.name, 'wb')
    for chunk in up_file.chunks():
        destination.write(chunk)
        destination.close()

    # check .zip has a shapefile and unzip it if OK
    if up_file.name.split('.')[-1].lower() == "zip":
        zipShape = None
        z = None
        try:
            z = open(settings.UPLOAD_ROOT + idf + "/" + up_file.name, "rb")
            zipShape = zipfile.ZipFile(z)
            # extract file suffixes in a set
            zfiles = {i.split('.')[-1] for i in zipShape.namelist()}
            # and check the mandatory shapefile files are there
            if not zfiles >= set(settings.SHAPE_MANDATORY_FILES):
                #return Response({"success": False, "content": "shapefiles files should be " + str(settings.SHAPE_MANDATORY_FILES)})
                return [
                    False, "shapefiles files should be " +
                    str(settings.SHAPE_MANDATORY_FILES)
                ]
            else:
                for fileName in zipShape.namelist():  # unzip files
                    out = open(settings.UPLOAD_ROOT + idf + "/" + fileName,
                               "wb")
                    out.write(zipShape.read(fileName))
                    out.close()
        except Exception as e:  # for any other unexpected error
            #return Response({"success": False, "content": "problems extracting the zip file, try to upload again"})
            return [
                False, "problems extracting the zip file, try to upload again"
            ]
        finally:
            if zipShape: zipShape.close()
            if z: z.close()

    # if no problem returns the folder ID and the file extension plus settings necessary to upload files to database
    return [
        True, {
            'idf': idf,
            'fl_ext': up_file.name.split('.')[-1],
            'METADATA_ID_TYPES': settings.METADATA_ID_TYPES,
            'METADATA_FIELDS_TYPES': settings.METADATA_FIELDS_TYPES,
            'METADATA_IDS': settings.METADATA_IDS
        }
    ]
Exemplo n.º 7
0
    def berrycolor_workflow_2_old(
            file, folder="/vagrant/code/pysdss/data/output/text/", gdal=True):
        """
        testing interpolation and statistics along the line (this was the old workflow_2 for sparse data, when there
        was also the workflow_3, now there is only a workflow 2) (see colordata/colordata.py)

        use gdal False for scipy radial basis
        :return:
        """

        ############################ 1 download filtered data with the chosen and create a dataframe
        # create a folder to store the output
        id = utils.create_uniqueid()
        if not os.path.exists(folder + id):
            os.mkdir(folder + id)
        folder = folder + id + "/"

        #set the path to folder with settings
        jsonpath = os.path.join(os.path.dirname(__file__), '..',
                                "pysdss/experiments/interpolation/")
        jsonpath = os.path.normpath(jsonpath)

        #1 convert point to polyline

        utils2.csv_to_polyline_shapefile(file,
                                         ycol="lat",
                                         xcol="lon",
                                         linecol="row",
                                         epsg=32611,
                                         outpath=folder + "rows.shp")

        #############################  2 buffer the polyline

        utils2.buffer(folder + "rows.shp", folder + "rows_buffer.shp", 0.25)

        ##############################  3rasterize poliline

        #need a grid
        df = pd.read_csv(file)
        minx = math.floor(df['lon'].min())
        maxx = math.ceil(df['lon'].max())
        miny = math.floor(df['lat'].min())
        maxy = math.ceil(df['lat'].max())
        delty = maxy - miny  #height
        deltx = maxx - minx  #width

        area_multiplier = 2  #2 to double the resulution and halve the pixel size to 0.5 meters

        path = jsonpath + "/rasterize.json"
        params = utils.json_io(path, direction="in")

        params["-a"] = "id_row"
        params["-te"] = str(minx) + " " + str(miny) + " " + str(
            maxx) + " " + str(maxy)
        params["-ts"] = str(deltx * 2) + " " + str(
            delty * 2)  #pixel 0,5 meters
        params["-ot"] = "Int16"

        params["src_datasource"] = folder + "rows_buffer.shp"
        params["dst_filename"] = folder + "rows_buffer.tiff"

        # build gdal_grid request
        text = grd.build_gdal_rasterize_string(params, outtext=False)
        print(text)
        text = ["gdal_rasterize"] + text
        print(text)

        # call gdal_rasterize
        print("rasterizing the rows")
        out, err = utils.run_tool(text)
        print("output" + out)
        if err:
            print("error:" + err)
            raise Exception("gdal rasterize failed")

        #################################   4get buffered poliline index

        d = gdal.Open(folder + "rows_buffer.tiff")
        row_index, row_indexed, row_properties = gdalIO.raster_dataset_to_indexed_numpy(
            d, id, maxbands=1, bandLocation="byrow", nodata=-1)

        print("saving indexed array to disk")
        fileIO.save_object(folder + "rows_buffer_index",
                           (row_index, row_indexed, row_properties))
        d = None

        ###################################   5interpolate points, use the index to extract statistics along the line

        with open(jsonpath + "/vrtmodel.txt") as f:
            xml = f.read()

        # output the 4 virtual files for the 4 columns
        for clr in ["a", "b", "c", "d"]:
            data = {
                "layername": os.path.basename(file).split(".")[0],
                "fullname": file,
                "easting": "lon",
                "northing": "lat",
                "elevation": clr
            }
            utils2.make_vrt(xml, data, folder + "_" + clr + ".vrt")
        # output the 2 virtual files for or raw fruit count and visible fruit count
        data = {
            "layername": os.path.basename(file).split(".")[0],
            "fullname": file,
            "easting": "lon",
            "northing": "lat",
            "elevation": "raw_fruit_count"
        }
        utils2.make_vrt(xml, data, folder + "_rawfruit.vrt")
        data = {
            "layername": os.path.basename(file).split(".")[0],
            "fullname": file,
            "easting": "lon",
            "northing": "lat",
            "elevation": "visible_fruit_per_m"
        }
        utils2.make_vrt(xml, data, folder + "_visiblefruit.vrt")

        # interpolate

        if gdal:
            path = jsonpath + "/invdist.json"
            params = utils.json_io(path, direction="in")
            params["-txe"] = str(minx) + " " + str(maxx)
            params["-tye"] = str(miny) + " " + str(maxy)
            params["-outsize"] = str(deltx * area_multiplier) + " " + str(
                delty * area_multiplier)
            params["-a"]["radius1"] = "10"
            params["-a"]["radius2"] = "10"
            params["-a"]["smoothing"] = "20"
            params["-a"]["power"] = "0"

        else:  #scipy
            #set up the interpolation grid
            tix = np.linspace(minx, maxx, deltx * 2)
            tiy = np.linspace(miny, maxy, delty * 2)
            XI, YI = np.meshgrid(tix, tiy)

        if gdal:
            for clr in ["raw", "visible"]:

                params["src_datasource"] = folder + "_" + clr + "fruit.vrt"
                params["dst_filename"] = folder + "_" + clr + "fruit.tiff"

                #print(params)
                # build gdal_grid request
                text = grd.build_gdal_grid_string(params, outtext=False)
                print(text)
                text = ["gdal_grid"] + text
                print(text)

                # call gdal_grid
                print("Interpolating for count " + clr)
                out, err = utils.run_tool(text)
                print("output" + out)
                if err:
                    print("error:" + err)
                    raise Exception("gdal grid failed")
        else:
            for clr in ["raw_fruit_count", "visible_fruit_per_m"]:
                rbf = Rbf(df['lon'].values, df['lat'].values, df[clr].values)
                ZI = rbf(XI, YI)
                print()

        # upload to numpy
        d = gdal.Open(folder + "_rawfruit.tiff")
        band = d.GetRasterBand(1)
        # apply index
        new_r_indexed_raw = gdalIO.apply_index_to_single_band(band, row_index)
        d = None

        d = gdal.Open(folder + "_visiblefruit.tiff")
        band = d.GetRasterBand(1)
        # apply index
        new_r_indexed_visible = gdalIO.apply_index_to_single_band(
            band, row_index)
        d = None

        stats = {}

        for clr in ["a", "b", "c", "d"]:
            params["src_datasource"] = folder + "_" + clr + ".vrt"
            params["dst_filename"] = folder + "_" + clr + ".tiff"

            # build gdal_grid request
            text = grd.build_gdal_grid_string(params, outtext=False)
            print(text)
            text = ["gdal_grid"] + text
            print(text)

            # call gdal_grid
            print("interpolating for color " + clr)
            out, err = utils.run_tool(text)
            print("output" + out)
            if err:
                print("error:" + err)
                raise Exception("gdal grid failed")

            # upload to numpy
            d = gdal.Open(folder + "_" + clr + ".tiff")
            band = d.GetRasterBand(1)
            # apply index
            new_r_indexed = gdalIO.apply_index_to_single_band(
                band, row_index)  # this is the index from the rows
            d = None

            stats[clr] = {}

            for i in np.unique(row_indexed):  # get the row numbers

                area = math.pow(1 / area_multiplier, 2)

                # get a mask for the current row
                mask = row_indexed == i
                # statistics for current row

                # average, std, totalarea,total nonzero area, total zeroarea, total raw fruit count,
                # average raw fruit count, std raw fruit count ,total visible fruitxm, average visible fruitXm, std visible fruitXm

                # r_indexed is 2d , while new_r_indexed and mask are 1d

                stats[clr][i] = [
                    new_r_indexed[mask[0, :]].mean(),
                    new_r_indexed[mask[0, :]].std(),
                    new_r_indexed[mask[0, :]].shape[0] * area,
                    np.count_nonzero(new_r_indexed[mask[0, :]]) * area,
                    new_r_indexed[mask[0, :]][new_r_indexed[mask[0, :]] ==
                                              0].shape[0] * area,
                    new_r_indexed_raw[mask[0, :]].sum(),
                    new_r_indexed_raw[mask[0, :]].mean(),
                    new_r_indexed_raw[mask[0, :]].std(),
                    new_r_indexed_visible[mask[0, :]].sum(),
                    new_r_indexed_visible[mask[0, :]].mean(),
                    new_r_indexed_visible[mask[0, :]].std()
                ]

        return id, stats
Exemplo n.º 8
0
    def tests_filter_by_step():

        # download data
        folder = "/vagrant/code/pysdss/data/output/text/"
        experimentfolder = "/vagrant/code/pysdss/experiments/interpolation/"

        id = utils.create_uniqueid()

        os.mkdir(folder + id)

        print(id)

        dowload_all_data_test(id, folder + id + "/")

        df = pd.read_csv(folder + id + "/" + id + ".csv")
        # strip and lower column names
        df.columns = df.columns.str.strip()
        df.columns = df.columns.str.lower()

        #for i in [2, 3, 4, 5, 6, 7, 8, 9, 10]:
        for i in [10, 30, 50]:
            # calculate statistics by row

            print("step " + str(i))

            keep, delete = filter.filter_bystep(df,
                                                step=i,
                                                rand=False,
                                                first_last=False,
                                                rowname=None)
            keep.to_csv(folder + id + "/NOrandom_NOfirstlast_step" + str(i) +
                        "_keep.csv")
            delete.to_csv(folder + id + "/NOrandom_NOfirstlast_step" + str(i) +
                          "_delete.csv")

            keep, delete = filter.filter_bystep(df,
                                                step=i,
                                                rand=False,
                                                first_last=True,
                                                rowname="row")
            keep.to_csv(folder + id + "/NOrandom_firstlast_step" + str(i) +
                        "_keep.csv")
            delete.to_csv(folder + id + "/NOrandom_firstlast_step" + str(i) +
                          "_delete.csv")

            keep, delete = filter.filter_bystep(df,
                                                step=i,
                                                rand=True,
                                                first_last=False,
                                                rowname=None)
            keep.to_csv(folder + id + "/random_NOfirstlast_step" + str(i) +
                        "_keep.csv")
            delete.to_csv(folder + id + "/random_NOfirstlast_step" + str(i) +
                          "_delete.csv")

            keep, delete = filter.filter_bystep(df,
                                                step=i,
                                                rand=True,
                                                first_last=True,
                                                rowname="row")
            keep.to_csv(folder + id + "/random_firstlast_step" + str(i) +
                        "_keep.csv")
            delete.to_csv(folder + id + "/random_firstlast_step" + str(i) +
                          "_delete.csv")

        try:
            keep, delete = filter.filter_bystep(df,
                                                step=i,
                                                rand=True,
                                                first_last=True,
                                                rowname=None)
        except ValueError as e:
            print("test passed for first_last and no row name " + str(e))