Exemplo n.º 1
0
    def __init__(self, inputFilename, delimiter, sort_col, logname):
        """
        @param inputFilename: full pathname of a CSV file containing records
                 uniquely identified by the first field in a record.
        @param delimiter: separator between fields of a record
        @param log: a logger to write debugging messages to a file
        @note: Original data file is composed of ~100 chunks of records, each
             chunk sorted on the gbif ID, in position 0 of each record.

        """
        self.messyfile = inputFilename
        self.delimiter = delimiter
        self.sort_col = sort_col
        try:
            sort_idx = int(sort_col)
            self.sort_idx = sort_idx
        except:
            self.sort_idx = None
        self.header = []

        tmp, _ = os.path.splitext(self.messyfile)
        self._basepath, self._dataname = os.path.split(tmp)

        logfname = os.path.join(pth, '{}.log'.format(logname))
        self._log = get_logger(logname, logfname)

        self.pth = pth
        self.splitBase = os.path.join(pth, 'split_{}'.format(self._dataname))
        self.tidyfile = os.path.join(pth, 'tidy_{}.csv'.format(self._dataname))
        self._files = {}
Exemplo n.º 2
0
def intersect_csv_and_shapefiles(in_csv_filename, geodata1, geodata2,
                                 ancillary_path, out_csv_filename, from_gbif):
    """Intersect the records in the csv file with the two provided shapefiles.

    Args:
        csv_filename (str): Path to a CSV file of records.
        shapefile_1_filename (str): Path to the first shapefile to check for
            intersection.
        shapefile_2_filename (str): Path to the second shapefile to check for
            intersection.
        out_csv_filename (str): Path for output CSV records.
    """
    pth, basefname = os.path.split(out_csv_filename)
    logbasename, _ = os.path.splitext(basefname)
    logfname = os.path.join(pth, '{}.log'.format(logbasename))
    logger = get_logger(logbasename, logfname)
    bf = BisonFiller(log=logger)
    # Pass 4 of CSV transform, final step, point-in-polygon intersection
    bf.update_point_in_polygons(geodata1,
                                geodata2,
                                ancillary_path,
                                in_csv_filename,
                                out_csv_filename,
                                from_gbif=from_gbif)
    # Do intersection here
    sleep(randint(0, 10))
    print(' - {}'.format(out_csv_filename))
Exemplo n.º 3
0
    def __init__(self, infname, indelimiter, group_col, logname):
        """Split a large CSV file into individual files grouped by one column.

        Args:
            infname: full pathname to a CSV file containing records to be
                grouped on the value in a field of the records
            indelimiter: separator between fields of a record
            group_col: the column name (for files with a header) or column
                index for the field to be used for grouping
            logname: the basename for a message logger
        """
        self.messyfile = infname
        self.indelimiter = indelimiter
        self.group_col = group_col
        self.header = self._get_header()
        self.group_idx = None
        try:
            self.group_idx = int(group_col)
        except:
            try:
                self.group_idx = self.header.index(group_col)
            except:
                raise Exception('Field {} does not exist in header {}'.format(
                    self.group_col, self.header))

        tmp, _ = os.path.splitext(self.messyfile)
        self._basepath, self._dataname = os.path.split(tmp)

        logfname = os.path.join(pth, '{}.log'.format(logname))
        self._log = get_logger(logname, logfname)

        self.pth = pth
        self._files = {}
Exemplo n.º 4
0
 def reset_logger(self, outdir, logname):
     self._log = None
     logfname = os.path.join(outdir, '{}.log'.format(logname))
     logger = get_logger(logname, logfname)
     self._log = logger