def main(): # Get the options input = options["input"] where = options["where"] columns = options["columns"] tempwhere = options["t_where"] layer = options["layer"] separator = grass.separator(options["separator"]) if where == "" or where == " " or where == "\n": where = None if columns == "" or columns == " " or columns == "\n": columns = None # Make sure the temporal database exists tgis.init() sp = tgis.open_old_stds(input, "stvds") rows = sp.get_registered_maps("name,layer,mapset,start_time,end_time", tempwhere, "start_time", None) col_names = "" if rows: for row in rows: vector_name = "%s@%s" % (row["name"], row["mapset"]) # In case a layer is defined in the vector dataset, # we override the option layer if row["layer"]: layer = row["layer"] select = grass.read_command("v.db.select", map=vector_name, layer=layer, columns=columns, separator="%s" % (separator), where=where) if not select: grass.fatal(_("Unable to run v.db.select for vector map <%s> " "with layer %s") % (vector_name, layer)) # The first line are the column names list = select.split("\n") count = 0 for entry in list: if entry.strip() != "": # print the column names in case they change if count == 0: col_names_new = "start_time%send_time%s%s" % ( separator, separator, entry) if col_names != col_names_new: col_names = col_names_new print col_names else: if row["end_time"]: print "%s%s%s%s%s" % (row["start_time"], separator, row["end_time"], separator, entry) else: print "%s%s%s%s" % (row["start_time"], separator, separator, entry) count += 1
def main(): # Get the options input = options["input"] where = options["where"] extended = flags["e"] no_header = flags["s"] separator = grass.separator(options["separator"]) # Make sure the temporal database exists tgis.init() tgis.print_gridded_dataset_univar_statistics( "strds", input, where, extended, no_header, separator)
def main(): # Get the options inputs = options["inputs"] sampler = options["sample"] samtype = options["samtype"] intype = options["intype"] separator = grass.separator(options["separator"]) method = options["method"] header = flags["c"] spatial = flags["s"] # Make sure the temporal database exists tgis.init() tgis.sample_stds_by_stds_topology(intype, samtype, inputs, sampler, header, separator, method, spatial, True)
def main(): # Get the options input = options["input"] columns = options["columns"] order = options["order"] where = options["where"] separator = grass.separator(options["separator"]) method = options["method"] header = flags["u"] output = options["output"] # Make sure the temporal database exists tgis.init() tgis.list_maps_of_stds("stvds", input, columns, order, where, separator, method, header, outpath=output)
def main(): # Get the options input = options["input"] twhere = options["twhere"] layer = options["layer"] type = options["type"] column = options["column"] where = options["where"] extended = flags["e"] header = flags["s"] separator = grass.separator(options["separator"]) # Make sure the temporal database exists tgis.init() tgis.print_vector_dataset_univar_statistics( input, twhere, layer, type, column, where, extended, header, separator)
def main(): #lazy imports import grass.temporal as tgis # Get the options input = options["input"] columns = options["columns"] order = options["order"] where = options["where"] separator = grass.separator(options["separator"]) method = options["method"] header = flags["u"] output = options["output"] # Make sure the temporal database exists tgis.init() tgis.list_maps_of_stds("stvds", input, columns, order, where, separator, method, header, outpath=output)
def main(): # Get the options name = options["input"] maps = options["maps"] type = options["type"] file = options["file"] separator = grass.separator(options["separator"]) start = options["start"] end = options["end"] unit = options["unit"] increment = options["increment"] interval = flags["i"] # Make sure the temporal database exists tgis.init() # Register maps tgis.register_maps_in_space_time_dataset( type=type, name=name, maps=maps, file=file, start=start, end=end, unit=unit, increment=increment, dbif=None, interval=interval, fs=separator)
def main(): # lazy imports import grass.temporal as tgis # Get the options inputs = options["inputs"] sampler = options["sample"] samtype = options["samtype"] intype = options["intype"] separator = grass.separator(options["separator"]) method = options["method"] header = flags["c"] spatial = flags["s"] # Make sure the temporal database exists tgis.init() tgis.sample_stds_by_stds_topology(intype, samtype, inputs, sampler, header, separator, method, spatial, True)
def main(): # Get the options input = options["input"] output = options["output"] where = options["where"] extended = flags["e"] no_header = flags["s"] separator = grass.separator(options["separator"]) # Make sure the temporal database exists tgis.init() if not output: output = None if output == "-": output = None tgis.print_gridded_dataset_univar_statistics("str3ds", input, output, where, extended, no_header, separator)
def main(): # Get the options input = options["input"] output = options["output"] where = options["where"] extended = flags["e"] no_header = flags["s"] rast_region = bool(flags["r"]) separator = grass.separator(options["separator"]) # Make sure the temporal database exists tgis.init() if not output: output = None if output == "-": output = None tgis.print_gridded_dataset_univar_statistics( "strds", input, output, where, extended, no_header, separator, rast_region )
def main(): # Get the options input = options["input"] output = options["output"] twhere = options["twhere"] layer = options["layer"] type = options["type"] column = options["column"] where = options["where"] extended = flags["e"] header = flags["s"] separator = grass.separator(options["separator"]) # Make sure the temporal database exists tgis.init() if not output: output = None if output == "-": output = None tgis.print_vector_dataset_univar_statistics( input, output, twhere, layer, type, column, where, extended, header, separator)
def main(): orig_point_map = options['input'] flow_file = options['flow_input_file'] minoffset = float(options['minimum_offset']) maxoffset = float(options['maximum_offset']) vertices = int(options['vertices']) outputfile = options['output'] separator = gscript.separator(options['separator']) sameok = flags['s'] header = True pid = os.getpid() global tmplines, tmplines2, tmppoints, vseginfile, vnetinfile tmplines = 'tmp_vnetcurvedarcs_tmplines_%d' % pid tmplines2 = 'tmp_vnetcurvedarcs_tmplines2_%d' % pid tmppoints = 'tmp_vnetcurvedarcs_tmppoints_%d' % pid vnetinfile, sqlfile = process_infile(flow_file, separator, header, sameok, outputfile) gscript.message(_("Creating straight flow lines...")) gscript.run_command('v.net', points=orig_point_map, operation='arcs', file_=vnetinfile, out=tmplines, overwrite=True, quiet=True) linedata = gscript.read_command('v.to.db', flags='p', map_=tmplines, option='length', quiet=True).splitlines() lineinfo = {} for line in linedata: data = line.split('|') if int(data[0]) > 0: lineinfo[int(data[0])] = float(data[1]) vseginfile, maxcat = write_segmentdefs(lineinfo, minoffset, maxoffset, vertices) gscript.message(_("Creating points of curved lines...")) gscript.run_command('v.segment', input_=tmplines, out=tmppoints, rules=vseginfile, overwrite=True, quiet=True) gscript.message(_("Creating curved lines from points...")) vnetinfile = write_segarcdefs(lineinfo, maxcat) gscript.run_command('v.net', points=tmppoints, output=tmplines, operation='arcs', file_=vnetinfile, overwrite=True, quiet=True) gscript.run_command('v.extract', input_=tmplines, output=tmplines2, layer=1, overwrite=True, quiet=True) gscript.message(_("Creating polylines...")) gscript.run_command('v.build.polylines', input_=tmplines2, output=outputfile, cats='multi', overwrite=True, quiet=True) gscript.run_command( 'v.db.addtable', map_=outputfile, columns="from_node int, to_node int, volume double precision", quiet=True, overwrite=True) gscript.run_command('db.execute', input_=sqlfile, quiet=True)
def main(): """Import file according to the command line parameters""" # Allow more locals in the main. # pylint: disable=too-many-locals options, unused_flags = gs.parser() # Requires pyproj >= 2.2.0 # Lazy importing pyproj because it is not a dependency of GRASS GIS. from pyproj import Transformer # pylint: disable=import-outside-toplevel to_crs = get_current_crs() # We assign xy as result, so we need to keep the en ordering. transformer = Transformer.from_crs(options["crs"], to_crs, always_xy=True, skip_equivalent=True) input_filename = options["input"] output_map = options["output"] lat_name = options["latitude"] lon_name = options["longitude"] separator = gs.separator(options["separator"]) integer_names = options["int_columns"].split(",") float_names = options["real_columns"].split(",") # Lat and lon as doubles because we require that anyway. float_names.extend([lat_name, lon_name]) if options["limit"]: limit = int(options["limit"]) else: limit = None assert limit is None or limit >= 1, "Check limit option definition" fieldnames = get_header_from_csv(input_filename, separator) if "X" not in fieldnames and "Y" not in fieldnames: # If there is X and Y, we will replace is content. fieldnames.extend(["X", "Y"]) float_names.extend(["X", "Y"]) y_index = len(fieldnames) # One-based index in v.in.ascii x_index = y_index - 1 else: y_index = fieldnames.index("Y") + 1 x_index = fieldnames.index("X") + 1 tmp_file = get_tmp_file_name() with open(input_filename) as infile, open(tmp_file, mode="w") as outfile: reader = csv.DictReader(infile, delimiter=separator) writer = csv.DictWriter( outfile, fieldnames=fieldnames, delimiter=separator, quotechar='"', lineterminator="\n", ) writer.writeheader() for i, row in enumerate(reader): if limit and i >= limit: break lon = float(row[lon_name]) lat = float(row[lat_name]) x, y = transformer.transform(lon, lat) row["X"] = x row["Y"] = y writer.writerow(row) sql_columns = names_to_sql_columns(fieldnames, float_names, integer_names) gs.run_command( "v.in.ascii", input=tmp_file, output=output_map, format="point", separator=separator, text='"', skip=1, columns=sql_columns, x=x_index, y=y_index, ) return 0
def main(): global allmap global trainmap global feature_vars global training_vars global model_output_csv global model_output_csvt global temptable global r_commands global reclass_files allmap = trainmap = feature_vars = training_vars = None model_output_csv = model_output_csvt = temptable = r_commands = None reclass_files = None voting_function = "voting <- function (x, w) {\n" voting_function += "res <- tapply(w, x, sum, simplify = TRUE)\n" voting_function += "maj_class <- as.numeric(names(res)[which.max(res)])\n" voting_function += "prob <- as.numeric(res[which.max(res)])\n" voting_function += "return(list(maj_class=maj_class, prob=prob))\n}" weighting_functions = {} weighting_functions[ 'smv'] = "weights <- rep(1/length(weighting_base), length(weighting_base))" weighting_functions[ 'swv'] = "weights <- weighting_base/sum(weighting_base)" weighting_functions[ 'bwwv'] = "weights <- 1-(max(weighting_base) - weighting_base)/(max(weighting_base) - min(weighting_base))" weighting_functions[ 'qbwwv'] = "weights <- ((min(weighting_base) - weighting_base)/(max(weighting_base) - min(weighting_base)))**2" packages = { 'svmRadial': ['kernlab'], 'svmLinear': ['kernlab'], 'svmPoly': ['kernlab'], 'rf': ['randomForest'], 'rpart': ['rpart'], 'C5.0': ['C50'], 'xgbTree': ['xgboost', 'plyr'] } install_package = "if(!is.element('%s', installed.packages()[,1])){\n" install_package += "cat('\\n\\nInstalling %s package from CRAN\n')\n" install_package += "if(!file.exists(Sys.getenv('R_LIBS_USER'))){\n" install_package += "dir.create(Sys.getenv('R_LIBS_USER'), recursive=TRUE)\n" install_package += ".libPaths(Sys.getenv('R_LIBS_USER'))}\n" install_package += "chooseCRANmirror(ind=1)\n" install_package += "install.packages('%s', dependencies=TRUE)}" if options['segments_map']: allfeatures = options['segments_map'] segments_layer = options['segments_layer'] allmap = True else: allfeatures = options['segments_file'] allmap = False if options['training_map']: training = options['training_map'] training_layer = options['training_layer'] trainmap = True else: training = options['training_file'] trainmap = False classcol = options['train_class_column'] output_classcol = options['output_class_column'] output_probcol = None if options['output_prob_column']: output_probcol = options['output_prob_column'] classifiers = options['classifiers'].split(',') weighting_modes = options['weighting_modes'].split(',') weighting_metric = options['weighting_metric'] processes = int(options['processes']) folds = options['folds'] partitions = options['partitions'] tunelength = options['tunelength'] separator = gscript.separator(options['separator']) tunegrids = literal_eval( options['tunegrids']) if options['tunegrids'] else {} classification_results = None if options['classification_results']: classification_results = options['classification_results'].replace( "\\", "/") model_details = None if options['model_details']: model_details = options['model_details'].replace("\\", "/") raster_segments_map = None if options['raster_segments_map']: raster_segments_map = options['raster_segments_map'] classified_map = None if options['classified_map']: classified_map = options['classified_map'] r_script_file = None if options['r_script_file']: r_script_file = options['r_script_file'] accuracy_file = None if options['accuracy_file']: accuracy_file = options['accuracy_file'].replace("\\", "/") bw_plot_file = None if options['bw_plot_file']: bw_plot_file = options['bw_plot_file'].replace("\\", "/") if allmap: feature_vars = gscript.tempfile().replace("\\", "/") gscript.run_command('v.db.select', map_=allfeatures, file_=feature_vars, layer=segments_layer, quiet=True, overwrite=True) else: feature_vars = allfeatures.replace("\\", "/") if trainmap: training_vars = gscript.tempfile().replace("\\", "/") gscript.run_command('v.db.select', map_=training, file_=training_vars, layer=training_layer, quiet=True, overwrite=True) else: training_vars = training.replace("\\", "/") r_commands = gscript.tempfile().replace("\\", "/") r_file = open(r_commands, 'w') if processes > 1: install = install_package % ('doParallel', 'doParallel', 'doParallel') r_file.write(install) r_file.write("\n") # automatic installation of missing R packages install = install_package % ('caret', 'caret', 'caret') r_file.write(install) r_file.write("\n") install = install_package % ('e1071', 'e1071', 'e1071') r_file.write(install) r_file.write("\n") for classifier in classifiers: # knn is included in caret if classifier == "knn" or classifier == "knn1": continue for package in packages[classifier]: install = install_package % (package, package, package) r_file.write(install) r_file.write("\n") r_file.write("\n") r_file.write('require(caret)') r_file.write("\n") r_file.write( 'features <- read.csv("%s", sep="%s", header=TRUE, row.names=1)' % (feature_vars, separator)) r_file.write("\n") r_file.write( 'training <- read.csv("%s", sep="%s", header=TRUE, row.names=1)' % (training_vars, separator)) r_file.write("\n") r_file.write("training$%s <- as.factor(training$%s)" % (classcol, classcol)) r_file.write("\n") if processes > 1: r_file.write("library(doParallel)") r_file.write("\n") r_file.write("registerDoParallel(cores = %d)" % processes) r_file.write("\n") r_file.write( "MyFolds.cv <- createMultiFolds(training$%s, k=%s, times=%s)" % (classcol, folds, partitions)) r_file.write("\n") r_file.write( "MyControl.cv <- trainControl(method='repeatedCV', index=MyFolds.cv)") r_file.write("\n") r_file.write("fmla <- %s ~ ." % classcol) r_file.write("\n") r_file.write("models.cv <- list()") r_file.write("\n") for classifier in classifiers: if classifier == 'knn1': r_file.write("Grid <- expand.grid(k=1)") r_file.write("\n") r_file.write( "knn1Model.cv <- train(fmla, training, method='knn', trControl=MyControl.cv, tuneGrid=Grid)" ) r_file.write("\n") r_file.write("models.cv$knn1 <- knn1Model.cv") r_file.write("\n") else: if classifier in tunegrids: r_file.write("Grid <- expand.grid(%s)" % tunegrids[classifier]) r_file.write("\n") r_file.write( "%sModel.cv <- train(fmla,training,method='%s', trControl=MyControl.cv, tuneGrid=Grid)" % (classifier, classifier)) else: r_file.write( "%sModel.cv <- train(fmla,training,method='%s', trControl=MyControl.cv, tuneLength=%s)" % (classifier, classifier, tunelength)) r_file.write("\n") r_file.write("models.cv$%s <- %sModel.cv" % (classifier, classifier)) r_file.write("\n") r_file.write("if (length(models.cv)>1) {") r_file.write("\n") r_file.write("resamps.cv <- resamples(models.cv)") r_file.write("\n") r_file.write( "accuracy_means <- as.vector(apply(resamps.cv$values[seq(2,length(resamps.cv$values), by=2)], 2, mean))" ) r_file.write("\n") r_file.write( "kappa_means <- as.vector(apply(resamps.cv$values[seq(3,length(resamps.cv$values), by=2)], 2, mean))" ) r_file.write("\n") r_file.write("} else {") r_file.write("\n") r_file.write("resamps.cv <- models.cv[[1]]$resample") r_file.write("\n") r_file.write("accuracy_means <- mean(resamps.cv$Accuracy)") r_file.write("\n") r_file.write("kappa_means <- mean(resamps.cv$Kappa)") r_file.write("\n") r_file.write("}") r_file.write("\n") r_file.write("predicted <- data.frame(predict(models.cv, features))") r_file.write("\n") if flags['i']: r_file.write( "resultsdf <- data.frame(id=rownames(features), predicted)") else: r_file.write("resultsdf <- data.frame(id=rownames(features))") r_file.write("\n") r_file.write(voting_function) r_file.write("\n") if weighting_metric == 'kappa': r_file.write("weighting_base <- kappa_means") else: r_file.write("weighting_base <- accuracy_means") r_file.write("\n") for weighting_mode in weighting_modes: r_file.write(weighting_functions[weighting_mode]) r_file.write("\n") r_file.write("weights <- weights / sum(weights)") r_file.write("\n") r_file.write("vote <- apply(predicted, 1, voting, w=weights)") r_file.write("\n") r_file.write( "vote <- as.data.frame(matrix(unlist(vote), ncol=2, byrow=TRUE))") r_file.write("\n") r_file.write("resultsdf$%s_%s <- vote$V1" % (output_classcol, weighting_mode)) r_file.write("\n") if len(classifiers) > 1: r_file.write("resultsdf$%s_%s <- vote$V2" % (output_probcol, weighting_mode)) r_file.write("\n") if allmap and not flags['f']: model_output = gscript.tempfile().replace("\\", "/") model_output_csv = model_output + '.csv' write_string = "write.csv(resultsdf, '%s'," % model_output_csv write_string += " row.names=FALSE, quote=FALSE)" r_file.write(write_string) r_file.write("\n") if classified_map: reclass_files = {} if flags['i']: for classifier in classifiers: tmpfilename = gscript.tempfile() reclass_files[classifier] = tmpfilename.replace("\\", "/") r_file.write( "tempdf <- data.frame(resultsdf$id, resultsdf$%s)" % (classifier)) r_file.write("\n") r_file.write( "reclass <- data.frame(out=apply(tempdf, 1, function(x) paste(x[1],'=', x[2])))" ) r_file.write("\n") r_file.write( "write.table(reclass$out, '%s', col.names=FALSE, row.names=FALSE, quote=FALSE)" % reclass_files[classifier]) r_file.write("\n") for weighting_mode in weighting_modes: tmpfilename = gscript.tempfile() reclass_files[weighting_mode] = tmpfilename.replace("\\", "/") r_file.write( "tempdf <- data.frame(resultsdf$id, resultsdf$%s_%s)" % (output_classcol, weighting_mode)) r_file.write("\n") r_file.write( "reclass <- data.frame(out=apply(tempdf, 1, function(x) paste(x[1],'=', x[2])))" ) r_file.write("\n") r_file.write( "write.table(reclass$out, '%s', col.names=FALSE, row.names=FALSE, quote=FALSE)" % reclass_files[weighting_mode]) r_file.write("\n") if classification_results: r_file.write( "write.csv(resultsdf, '%s', row.names=FALSE, quote=FALSE)" % classification_results) r_file.write("\n") if accuracy_file: r_file.write( "df_means <- data.frame(method=names(models.cv),accuracy=accuracy_means, kappa=kappa_means)" ) r_file.write("\n") r_file.write( "write.csv(df_means, '%s', row.names=FALSE, quote=FALSE)" % accuracy_file) r_file.write("\n") if model_details: r_file.write("sink('%s')" % model_details) r_file.write("\n") r_file.write("cat('BEST TUNING VALUES\n')") r_file.write("\n") r_file.write("cat('******************************\n\n')") r_file.write("\n") r_file.write("lapply(models.cv, function(x) x$best)") r_file.write("\n") r_file.write("cat('\n')") r_file.write("\n") r_file.write("cat('\nSUMMARY OF RESAMPLING RESULTS\n')") r_file.write("\n") r_file.write("cat('******************************\n\n')") r_file.write("\n") r_file.write("summary(resamps.cv)") r_file.write("\n") r_file.write("cat('\n')") r_file.write("\n") r_file.write("cat('\nRESAMPLED CONFUSION MATRICES\n')") r_file.write("\n") r_file.write("cat('******************************\n\n')") r_file.write("\n") r_file.write( "conf.mat.cv <- lapply(models.cv, function(x) confusionMatrix(x))") r_file.write("\n") r_file.write("print(conf.mat.cv)") r_file.write("\n") r_file.write("cat('\nDETAILED CV RESULTS\n')") r_file.write("\n") r_file.write("cat('******************************\n\n')") r_file.write("\n") r_file.write("lapply(models.cv, function(x) x$results)") r_file.write("\n") r_file.write("sink()") r_file.write("\n") if bw_plot_file and len(classifiers) > 1: r_file.write("png('%s.png')" % bw_plot_file) r_file.write("\n") r_file.write("print(bwplot(resamps.cv))") r_file.write("\n") r_file.write("dev.off()") r_file.close() if r_script_file: shutil.copy(r_commands, r_script_file) gscript.message("Running R now. Following output is R output.") try: subprocess.check_call( ['Rscript', r_commands], stderr=subprocess.STDOUT, ) except subprocess.CalledProcessError: gscript.fatal( "There was an error in the execution of the R script.\nPlease check the R output." ) gscript.message("Finished running R.") if allmap and not flags['f']: model_output_csvt = model_output + '.csvt' temptable = 'classif_tmp_table_%d' % os.getpid() f = open(model_output_csvt, 'w') header_string = '"Integer"' if flags['i']: for classifier in classifiers: header_string += ',"Integer"' if len(classifiers) > 1: for weighting_mode in weighting_modes: header_string += ',"Integer"' header_string += ',"Real"' else: header_string += ',"Integer"' f.write(header_string) f.close() gscript.message("Loading results into attribute table") gscript.run_command('db.in.ogr', input_=model_output_csv, output=temptable, overwrite=True, quiet=True) index_creation = "CREATE INDEX idx_%s_cat" % temptable index_creation += " ON %s (id)" % temptable gscript.run_command('db.execute', sql=index_creation, quiet=True) columns = gscript.read_command('db.columns', table=temptable).splitlines()[1:] orig_cat = gscript.vector_db(allfeatures)[int(segments_layer)]['key'] gscript.run_command('v.db.join', map_=allfeatures, column=orig_cat, otable=temptable, ocolumn='id', subset_columns=columns, quiet=True) if classified_map: for classification, reclass_file in reclass_files.iteritems(): output_map = classified_map + '_' + classification gscript.run_command('r.reclass', input=raster_segments_map, output=output_map, rules=reclass_file, quiet=True)
def main(): orig_point_map = options["input"] flow_file = options["flow_input_file"] minoffset = float(options["minimum_offset"]) maxoffset = float(options["maximum_offset"]) vertices = int(options["vertices"]) outputfile = options["output"] separator = gscript.separator(options["separator"]) sameok = flags["s"] header = True pid = os.getpid() global tmplines, tmplines2, tmppoints, vseginfile, vnetinfile tmplines = "tmp_vnetcurvedarcs_tmplines_%d" % pid tmplines2 = "tmp_vnetcurvedarcs_tmplines2_%d" % pid tmppoints = "tmp_vnetcurvedarcs_tmppoints_%d" % pid vnetinfile, sqlfile = process_infile( flow_file, separator, header, sameok, outputfile ) gscript.message(_("Creating straight flow lines...")) gscript.run_command( "v.net", points=orig_point_map, operation="arcs", file_=vnetinfile, out=tmplines, overwrite=True, quiet=True, ) linedata = gscript.read_command( "v.to.db", flags="p", map_=tmplines, option="length", quiet=True ).splitlines() lineinfo = {} for line in linedata: data = line.split("|") if int(data[0]) > 0: lineinfo[int(data[0])] = float(data[1]) vseginfile, maxcat = write_segmentdefs(lineinfo, minoffset, maxoffset, vertices) gscript.message(_("Creating points of curved lines...")) gscript.run_command( "v.segment", input_=tmplines, out=tmppoints, rules=vseginfile, overwrite=True, quiet=True, ) gscript.message(_("Creating curved lines from points...")) vnetinfile = write_segarcdefs(lineinfo, maxcat) gscript.run_command( "v.net", points=tmppoints, output=tmplines, operation="arcs", file_=vnetinfile, overwrite=True, quiet=True, ) gscript.run_command( "v.extract", input_=tmplines, output=tmplines2, layer=1, overwrite=True, quiet=True, ) gscript.message(_("Creating polylines...")) gscript.run_command( "v.build.polylines", input_=tmplines2, output=outputfile, cats="multi", overwrite=True, quiet=True, ) gscript.run_command( "v.db.addtable", map_=outputfile, columns="from_node int, to_node int, volume double precision", quiet=True, overwrite=True, ) gscript.run_command("db.execute", input_=sqlfile, quiet=True)
def test_unrecognized_separator(): """Check that unknown strings are just passed through""" assert gs.separator("apple") == "apple"
def main(options, flags): import grass.pygrass.modules as pymod import grass.temporal as tgis from grass.pygrass.vector import VectorTopo invect = options["input"] if invect.find('@') != -1: invect = invect.split('@')[0] incol = options["date_column"] indate = options["date"] strds = options["strds"] if strds.find('@') != -1: strds_name = strds.split('@')[0] else: strds_name = strds output = options["output"] cols = options["columns"].split(',') mets = options["method"].split(',') gran = options["granularity"] dateformat = options["date_format"] separator = gscript.separator(options["separator"]) stdout = False if output != '-' and flags['u']: gscript.fatal(_("Cannot combine 'output' option and 'u' flag")) elif output != '-' and flags['c']: gscript.fatal(_("Cannot combine 'output' option and 'c' flag")) elif output == '-' and (flags['u'] or flags['c']): output = invect gscript.warning(_("Attribute table of vector {name} will be updated" "...").format(name=invect)) else: stdout = True if flags['c']: cols = [] for m in mets: colname = "{st}_{me}".format(st=strds_name, me=m) cols.append(colname) try: pymod.Module("v.db.addcolumn", map=invect, columns="{col} " "double precision".format(col=colname)) except CalledModuleError: gscript.fatal(_("Not possible to create column " "{col}".format(col=colname))) if output != '-' and len(cols) != len(mets): gscript.fatal(_("'columns' and 'method' options must have the same " "number of elements")) tgis.init() dbif = tgis.SQLDatabaseInterfaceConnection() dbif.connect() sp = tgis.open_old_stds(strds, "strds", dbif) if sp.get_temporal_type() == 'absolute': delta = int(tgis.gran_to_gran(gran, sp.get_granularity(), True)) if tgis.gran_singular_unit(gran) in ['year', 'month']: delta = int(tgis.gran_to_gran(gran, '1 day', True)) td = timedelta(delta) elif tgis.gran_singular_unit(gran) == 'day': delta = tgis.gran_to_gran(gran, sp.get_granularity(), True) td = timedelta(delta) elif tgis.gran_singular_unit(gran) == 'hour': td = timedelta(hours=delta) elif tgis.gran_singular_unit(gran) == 'minute': td = timedelta(minutes=delta) elif tgis.gran_singular_unit(gran) == 'second': td = timedelta(seconds=delta) else: if sp.get_granularity() >= int(gran): gscript.fatal(_("Input granularity is smaller or equal to the {iv}" " STRDS granularity".format(iv=strds))) td = int(gran) if incol and indate: gscript.fatal(_("Cannot combine 'date_column' and 'date' options")) elif not incol and not indate: gscript.fatal(_("You have to fill 'date_column' or 'date' option")) elif incol: try: dates = pymod.Module("db.select", flags='c', stdout_=PI, stderr_=PI, sql="SELECT DISTINCT {dc} from " "{vmap} order by {dc}".format(vmap=invect, dc=incol)) mydates = dates.outputs["stdout"].value.splitlines() except CalledModuleError: gscript.fatal(_("db.select return an error")) elif indate: mydates = [indate] pymap = VectorTopo(invect) pymap.open('r') if len(pymap.dblinks) == 0: try: pymap.close() pymod.Module("v.db.addtable", map=invect) except CalledModuleError: dbif.close() gscript.fatal(_("Unable to add table <%s> to vector map " "<%s>" % invect)) if pymap.is_open(): pymap.close() qfeat = pymod.Module("v.category", stdout_=PI, stderr_=PI, input=invect, option='print') myfeats = qfeat.outputs["stdout"].value.splitlines() if stdout: outtxt = '' for data in mydates: if sp.get_temporal_type() == 'absolute': fdata = datetime.strptime(data, dateformat) else: fdata = int(data) if flags['a']: sdata = fdata + td mwhere = "start_time >= '{inn}' and end_time < " \ "'{out}'".format(inn=fdata, out=sdata) else: sdata = fdata - td mwhere = "start_time >= '{inn}' and end_time < " \ "'{out}'".format(inn=sdata, out=fdata) lines = None try: r_what = pymod.Module("t.rast.what", points=invect, strds=strds, layout='timerow', separator=separator, flags="v", where=mwhere, quiet=True, stdout_=PI, stderr_=PI) lines = r_what.outputs["stdout"].value.splitlines() except CalledModuleError: pass if incol: try: qfeat = pymod.Module("db.select", flags='c', stdout_=PI, stderr_=PI, sql="SELECT DISTINCT cat from" " {vmap} where {dc}='{da}' order by " "cat".format(vmap=invect, da=data, dc=incol)) myfeats = qfeat.outputs["stdout"].value.splitlines() except CalledModuleError: gscript.fatal(_("db.select returned an error for date " "{da}".format(da=data))) if not lines and stdout: for feat in myfeats: outtxt += "{di}{sep}{da}".format(di=feat, da=data, sep=separator) for n in range(len(mets)): outtxt += "{sep}{val}".format(val='*', sep=separator) outtxt += "\n" if not lines: continue x = 0 for line in lines: vals = line.split(separator) if vals[0] in myfeats: try: nvals = np.array(vals[4:]).astype(np.float) except ValueError: if stdout: outtxt += "{di}{sep}{da}".format(di=vals[0], da=data, sep=separator) for n in range(len(mets)): outtxt += "{sep}{val}".format(val='*', sep=separator) outtxt += "\n" continue if stdout: outtxt += "{di}{sep}{da}".format(di=vals[0], da=data, sep=separator) for n in range(len(mets)): result = return_value(nvals, mets[n]) if stdout: outtxt += "{sep}{val}".format(val=result, sep=separator) else: try: if incol: pymod.Module("v.db.update", map=output, column=cols[n], value=str(result), where="{dc}='{da}' AND cat=" "{ca}".format(da=data, ca=vals[0], dc=incol)) else: pymod.Module("v.db.update", map=output, column=cols[n], value=str(result), where="cat={ca}".format(ca=vals[0])) except CalledModuleError: gscript.fatal(_("v.db.update return an error")) if stdout: outtxt += "\n" if x == len(myfeats): break else: x += 1 if stdout: print(outtxt)
def main(): global allmap global trainmap global feature_vars global training_vars global model_output_csv global model_output_csvt global temptable global r_commands global reclass_files allmap = trainmap = feature_vars = training_vars = None model_output_csv = model_output_csvt = temptable = r_commands = None reclass_files = None voting_function = "voting <- function (x, w) {\n" voting_function += "res <- tapply(w, x, sum, simplify = TRUE)\n" voting_function += "maj_class <- as.numeric(names(res)[which.max(res)])\n" voting_function += "prob <- as.numeric(res[which.max(res)])\n" voting_function += "return(list(maj_class=maj_class, prob=prob))\n}" weighting_functions = {} weighting_functions[ "smv"] = "weights <- rep(1/length(weighting_base), length(weighting_base))" weighting_functions[ "swv"] = "weights <- weighting_base/sum(weighting_base)" weighting_functions[ "bwwv"] = "weights <- 1-(max(weighting_base) - weighting_base)/(max(weighting_base) - min(weighting_base))" weighting_functions[ "qbwwv"] = "weights <- ((min(weighting_base) - weighting_base)/(max(weighting_base) - min(weighting_base)))**2" packages = { "svmRadial": ["kernlab"], "svmLinear": ["kernlab"], "svmPoly": ["kernlab"], "rf": ["randomForest"], "ranger": ["ranger", "dplyr"], "rpart": ["rpart"], "C5.0": ["C50"], "xgbTree": ["xgboost", "plyr"], } install_package = "if(!is.element('%s', installed.packages()[,1])){\n" install_package += "cat('\\n\\nInstalling %s package from CRAN')\n" install_package += "if(!file.exists(Sys.getenv('R_LIBS_USER'))){\n" install_package += "dir.create(Sys.getenv('R_LIBS_USER'), recursive=TRUE)\n" install_package += ".libPaths(Sys.getenv('R_LIBS_USER'))}\n" install_package += "chooseCRANmirror(ind=1)\n" install_package += "install.packages('%s', dependencies=TRUE)}" if options["segments_map"]: allfeatures = options["segments_map"] segments_layer = options["segments_layer"] allmap = True else: allfeatures = options["segments_file"] allmap = False if options["training_map"]: training = options["training_map"] training_layer = options["training_layer"] trainmap = True else: training = options["training_file"] trainmap = False classcol = None if options["train_class_column"]: classcol = options["train_class_column"] output_classcol = options["output_class_column"] output_probcol = None if options["output_prob_column"]: output_probcol = options["output_prob_column"] classifiers = options["classifiers"].split(",") weighting_modes = options["weighting_modes"].split(",") weighting_metric = options["weighting_metric"] if len(classifiers) == 1: gscript.message("Only one classifier, so no voting applied") processes = int(options["processes"]) folds = options["folds"] partitions = options["partitions"] tunelength = options["tunelength"] separator = gscript.separator(options["separator"]) tunegrids = literal_eval( options["tunegrids"]) if options["tunegrids"] else {} max_features = None if options["max_features"]: max_features = int(options["max_features"]) training_sample_size = None if options["training_sample_size"]: training_sample_size = options["training_sample_size"] tuning_sample_size = None if options["tuning_sample_size"]: tuning_sample_size = options["tuning_sample_size"] output_model_file = None if options["output_model_file"]: output_model_file = options["output_model_file"].replace("\\", "/") input_model_file = None if options["input_model_file"]: input_model_file = options["input_model_file"].replace("\\", "/") classification_results = None if options["classification_results"]: classification_results = options["classification_results"].replace( "\\", "/") probabilities = flags["p"] model_details = None if options["model_details"]: model_details = options["model_details"].replace("\\", "/") raster_segments_map = None if options["raster_segments_map"]: raster_segments_map = options["raster_segments_map"] classified_map = None if options["classified_map"]: classified_map = options["classified_map"] r_script_file = None if options["r_script_file"]: r_script_file = options["r_script_file"] variable_importance_file = None if options["variable_importance_file"]: variable_importance_file = options["variable_importance_file"].replace( "\\", "/") accuracy_file = None if options["accuracy_file"]: accuracy_file = options["accuracy_file"].replace("\\", "/") bw_plot_file = None if options["bw_plot_file"]: bw_plot_file = options["bw_plot_file"].replace("\\", "/") if allmap: feature_vars = gscript.tempfile().replace("\\", "/") gscript.run_command( "v.db.select", map_=allfeatures, file_=feature_vars, layer=segments_layer, quiet=True, overwrite=True, ) else: feature_vars = allfeatures.replace("\\", "/") if trainmap: training_vars = gscript.tempfile().replace("\\", "/") gscript.run_command( "v.db.select", map_=training, file_=training_vars, layer=training_layer, quiet=True, overwrite=True, ) else: training_vars = training.replace("\\", "/") r_commands = gscript.tempfile().replace("\\", "/") r_file = open(r_commands, "w") if processes > 1: install = install_package % ("doParallel", "doParallel", "doParallel") r_file.write(install) r_file.write("\n") # automatic installation of missing R packages install = install_package % ("caret", "caret", "caret") r_file.write(install) r_file.write("\n") install = install_package % ("e1071", "e1071", "e1071") r_file.write(install) r_file.write("\n") install = install_package % ("data.table", "data.table", "data.table") r_file.write(install) r_file.write("\n") for classifier in classifiers: if classifier in packages: for package in packages[classifier]: install = install_package % (package, package, package) r_file.write(install) r_file.write("\n") r_file.write("\n") r_file.write("library(caret)") r_file.write("\n") r_file.write("library(data.table)") r_file.write("\n") if processes > 1: r_file.write("library(doParallel)") r_file.write("\n") r_file.write("registerDoParallel(cores = %d)" % processes) r_file.write("\n") if not flags["t"]: r_file.write( "features <- data.frame(fread('%s', sep='%s', header=TRUE, blank.lines.skip=TRUE, showProgress=FALSE), row.names=1)" % (feature_vars, separator)) r_file.write("\n") if classcol: r_file.write( "if('%s' %%in%% names(features)) {features <- subset(features, select=-%s)}" % (classcol, classcol)) r_file.write("\n") if input_model_file: r_file.write("finalModels <- readRDS('%s')" % input_model_file) r_file.write("\n") for classifier in classifiers: for package in packages[classifier]: r_file.write("library(%s)" % package) r_file.write("\n") else: r_file.write( "training <- data.frame(fread('%s', sep='%s', header=TRUE, blank.lines.skip=TRUE, showProgress=FALSE), row.names=1)" % (training_vars, separator)) r_file.write("\n") # We have to make sure that class variable values start with a letter as # they will be used as variables in the probabilities calculation r_file.write("origclassnames <- training$%s" % classcol) r_file.write("\n") r_file.write( "training$%s <- as.factor(paste('class', training$%s, sep='_'))" % (classcol, classcol)) r_file.write("\n") if tuning_sample_size: r_file.write( "rndid <- with(training, ave(training[,1], %s, FUN=function(x) {sample.int(length(x))}))" % classcol) r_file.write("\n") r_file.write("tuning_data <- training[rndid<=%s,]" % tuning_sample_size) r_file.write("\n") else: r_file.write("tuning_data <- training") r_file.write("\n") # If a max_features value is set, then proceed to feature selection. # Currently, feature selection uses random forest. TODO: specific feature selection for each classifier. if max_features: r_file.write( "RfeControl <- rfeControl(functions=rfFuncs, method='cv', number=10, returnResamp = 'all')" ) r_file.write("\n") r_file.write( "RfeResults <- rfe(subset(tuning_data, select=-%s), tuning_data$%s, sizes=c(1:%i), rfeControl=RfeControl)" % (classcol, classcol, max_features)) r_file.write("\n") r_file.write("if(length(predictors(RfeResults))>%s)" % max_features) r_file.write("\n") r_file.write( "{if((RfeResults$results$Accuracy[%s+1] - RfeResults$results$Accuracy[%s])/RfeResults$results$Accuracy[%s] < 0.03)" % (max_features, max_features, max_features)) r_file.write("\n") r_file.write( "{RfeUpdate <- update(RfeResults, subset(tuning_data, select=-%s), tuning_data$%s, size=%s)" % (classcol, classcol, max_features)) r_file.write("\n") r_file.write("bestPredictors <- RfeUpdate$bestVar}}") r_file.write(" else {") r_file.write("\n") r_file.write("bestPredictors <- predictors(RfeResults)}") r_file.write("\n") r_file.write( "tuning_data <- tuning_data[,c('%s', bestPredictors)]" % classcol) r_file.write("\n") r_file.write("training <- training[,c('%s', bestPredictors)]" % classcol) r_file.write("\n") if not flags["t"]: r_file.write("features <- features[,bestPredictors]") r_file.write("\n") if probabilities: r_file.write( "MyControl.cv <- trainControl(method='repeatedcv', number=%s, repeats=%s, classProbs=TRUE, sampling='down')" % (folds, partitions)) else: r_file.write( "MyControl.cv <- trainControl(method='repeatedcv', number=%s, repeats=%s, sampling='down')" % (folds, partitions)) r_file.write("\n") r_file.write("fmla <- %s ~ ." % classcol) r_file.write("\n") r_file.write("models.cv <- list()") r_file.write("\n") r_file.write("finalModels <- list()") r_file.write("\n") r_file.write("variableImportance <- list()") r_file.write("\n") if training_sample_size: r_file.write( "rndid <- with(training, ave(training[,2], %s, FUN=function(x) {sample.int(length(x))}))" % classcol) r_file.write("\n") r_file.write("training_data <- training[rndid<=%s,]" % training_sample_size) r_file.write("\n") else: r_file.write("training_data <- training") r_file.write("\n") for classifier in classifiers: if classifier in tunegrids: r_file.write("Grid <- expand.grid(%s)" % tunegrids[classifier]) r_file.write("\n") r_file.write( "%sModel.cv <- train(fmla, tuning_data, method='%s', trControl=MyControl.cv, tuneGrid=Grid" % (classifier, classifier)) else: r_file.write( "%sModel.cv <- train(fmla, tuning_data, method='%s', trControl=MyControl.cv, tuneLength=%s" % (classifier, classifier, tunelength)) if flags["n"]: r_file.write(", preprocess=c('center', 'scale')") r_file.write(")") r_file.write("\n") r_file.write("models.cv$%s <- %sModel.cv" % (classifier, classifier)) r_file.write("\n") r_file.write( "finalControl <- trainControl(method = 'none', classProbs = TRUE)" ) r_file.write("\n") r_file.write( "finalModel <- train(fmla, training_data, method='%s', trControl=finalControl, tuneGrid=%sModel.cv$bestTune" % (classifier, classifier)) if flags["n"]: r_file.write(", preprocess=c('center', 'scale')") r_file.write(")") r_file.write("\n") r_file.write("finalModels$%s <- finalModel" % classifier) r_file.write("\n") r_file.write("variableImportance$%s <- varImp(finalModel)" % classifier) r_file.write("\n") if len(classifiers) > 1: r_file.write("resamps.cv <- resamples(models.cv)") r_file.write("\n") r_file.write( "accuracy_means <- as.vector(apply(resamps.cv$values[seq(2,length(resamps.cv$values), by=2)], 2, mean))" ) r_file.write("\n") r_file.write( "kappa_means <- as.vector(apply(resamps.cv$values[seq(3,length(resamps.cv$values), by=2)], 2, mean))" ) r_file.write("\n") else: r_file.write("resamps.cv <- models.cv[[1]]$resample") r_file.write("\n") r_file.write("accuracy_means <- mean(resamps.cv$Accuracy)") r_file.write("\n") r_file.write("kappa_means <- mean(resamps.cv$Kappa)") r_file.write("\n") if output_model_file: r_file.write("saveRDS(finalModels, '%s')" % (output_model_file)) r_file.write("\n") if not flags["t"]: r_file.write("predicted <- data.frame(predict(finalModels, features))") r_file.write("\n") # Now erase the 'class_' prefix again in order to get original class values r_file.write( "predicted <- data.frame(sapply(predicted, function (x) {gsub('class_', '', x)}))" ) r_file.write("\n") if probabilities: r_file.write( "probabilities <- data.frame(predict(finalModels, features, type='prob'))" ) r_file.write("\n") r_file.write( "colnames(probabilities) <- gsub('.c', '_prob_c', colnames(probabilities))" ) r_file.write("\n") r_file.write("ids <- rownames(features)") r_file.write("\n") # We try to liberate memory space as soon as possible, so erasing non necessary data r_file.write("rm(features)") r_file.write("\n") if flags["i"] or len(classifiers) == 1: r_file.write("resultsdf <- data.frame(id=ids, predicted)") else: r_file.write("resultsdf <- data.frame(id=ids)") r_file.write("\n") if len(classifiers) > 1: r_file.write(voting_function) r_file.write("\n") if weighting_metric == "kappa": r_file.write("weighting_base <- kappa_means") else: r_file.write("weighting_base <- accuracy_means") r_file.write("\n") for weighting_mode in weighting_modes: r_file.write(weighting_functions[weighting_mode]) r_file.write("\n") r_file.write("weights <- weights / sum(weights)") r_file.write("\n") r_file.write("vote <- apply(predicted, 1, voting, w=weights)") r_file.write("\n") r_file.write( "vote <- as.data.frame(matrix(unlist(vote), ncol=2, byrow=TRUE))" ) r_file.write("\n") r_file.write("resultsdf$%s_%s <- vote$V1" % (output_classcol, weighting_mode)) r_file.write("\n") r_file.write("resultsdf$%s_%s <- vote$V2" % (output_probcol, weighting_mode)) r_file.write("\n") r_file.write("rm(predicted)") r_file.write("\n") if allmap and not flags["f"]: model_output = gscript.tempfile().replace("\\", "/") model_output_csv = model_output + ".csv" write_string = "write.csv(resultsdf, '%s'," % model_output_csv write_string += " row.names=FALSE, quote=FALSE)" r_file.write(write_string) r_file.write("\n") if classified_map: reclass_files = {} if len(classifiers) > 1: if flags["i"]: for classifier in classifiers: tmpfilename = gscript.tempfile() reclass_files[classifier] = tmpfilename.replace( "\\", "/") r_file.write( "tempdf <- data.frame(resultsdf$id, resultsdf$%s)" % (classifier)) r_file.write("\n") r_file.write( "reclass <- data.frame(out=apply(tempdf, 1, function(x) paste(x[1],'=', x[2])))" ) r_file.write("\n") r_file.write( "write.table(reclass$out, '%s', col.names=FALSE, row.names=FALSE, quote=FALSE)" % reclass_files[classifier]) r_file.write("\n") for weighting_mode in weighting_modes: tmpfilename = gscript.tempfile() reclass_files[weighting_mode] = tmpfilename.replace( "\\", "/") r_file.write( "tempdf <- data.frame(resultsdf$id, resultsdf$%s_%s)" % (output_classcol, weighting_mode)) r_file.write("\n") r_file.write( "reclass <- data.frame(out=apply(tempdf, 1, function(x) paste(x[1],'=', x[2])))" ) r_file.write("\n") r_file.write( "write.table(reclass$out, '%s', col.names=FALSE, row.names=FALSE, quote=FALSE)" % reclass_files[weighting_mode]) r_file.write("\n") else: tmpfilename = gscript.tempfile() reclass_files[classifiers[0]] = tmpfilename.replace("\\", "/") r_file.write( "reclass <- data.frame(out=apply(resultsdf, 1, function(x) paste(x[1],'=', x[2])))" ) r_file.write("\n") r_file.write( "write.table(reclass$out, '%s', col.names=FALSE, row.names=FALSE, quote=FALSE)" % reclass_files[classifiers[0]]) r_file.write("\n") if classification_results: if probabilities: r_file.write("resultsdf <- cbind(resultsdf, probabilities)") r_file.write("\n") r_file.write("rm(probabilities)") r_file.write("\n") r_file.write( "write.csv(resultsdf, '%s', row.names=FALSE, quote=FALSE)" % classification_results) r_file.write("\n") r_file.write("rm(resultsdf)") r_file.write("\n") r_file.write("\n") if accuracy_file: r_file.write( "df_means <- data.frame(method=names(models.cv),accuracy=accuracy_means, kappa=kappa_means)" ) r_file.write("\n") r_file.write( "write.csv(df_means, '%s', row.names=FALSE, quote=FALSE)" % accuracy_file) r_file.write("\n") if variable_importance_file: r_file.write("sink('%s')" % variable_importance_file) r_file.write("\n") for classifier in classifiers: r_file.write("cat('Classifier: %s')" % classifier) r_file.write("\n") r_file.write("cat('******************************')") r_file.write("\n") r_file.write( "variableImportance$rf$importance[order(variableImportance$rf$importance$Overall, decreasing=TRUE),, drop=FALSE]" ) r_file.write("\n") r_file.write("sink()") r_file.write("\n") if model_details: r_file.write("sink('%s')" % model_details) r_file.write("\n") r_file.write("cat('BEST TUNING VALUES')") r_file.write("\n") r_file.write("cat('******************************')") r_file.write("\n") r_file.write("\n") r_file.write("lapply(models.cv, function(x) x$best)") r_file.write("\n") r_file.write("cat('\n\n')") r_file.write("\n") r_file.write("cat('SUMMARY OF RESAMPLING RESULTS')") r_file.write("\n") r_file.write("cat('******************************')") r_file.write("\n") r_file.write("cat('\n\n')") r_file.write("\n") r_file.write("summary(resamps.cv)") r_file.write("\n") r_file.write("cat('\n')") r_file.write("\n") r_file.write("cat('\nRESAMPLED CONFUSION MATRICES')") r_file.write("\n") r_file.write("cat('******************************')") r_file.write("\n") r_file.write("cat('\n\n')") r_file.write("\n") r_file.write( "conf.mat.cv <- lapply(models.cv, function(x) confusionMatrix(x))") r_file.write("\n") r_file.write("print(conf.mat.cv)") r_file.write("\n") r_file.write("cat('DETAILED CV RESULTS')") r_file.write("\n") r_file.write("cat('\n\n')") r_file.write("\n") r_file.write("cat('******************************')") r_file.write("\n") r_file.write("cat('\n\n')") r_file.write("\n") r_file.write("lapply(models.cv, function(x) x$results)") r_file.write("\n") r_file.write("sink()") r_file.write("\n") if bw_plot_file and len(classifiers) > 1: r_file.write("png('%s.png')" % bw_plot_file) r_file.write("\n") r_file.write("print(bwplot(resamps.cv))") r_file.write("\n") r_file.write("dev.off()") r_file.write("\n") r_file.close() if r_script_file: shutil.copy(r_commands, r_script_file) gscript.message("Running R now. Following output is R output.") try: subprocess.check_call( ["Rscript", r_commands], stderr=subprocess.STDOUT, ) except subprocess.CalledProcessError: gscript.fatal( "There was an error in the execution of the R script.\nPlease check the R output." ) gscript.message("Finished running R.") if allmap and not flags["f"]: model_output_csvt = model_output + ".csvt" temptable = "classif_tmp_table_%d" % os.getpid() f = open(model_output_csvt, "w") header_string = '"Integer"' if flags["i"]: for classifier in classifiers: header_string += ',"Integer"' if len(classifiers) > 1: for weighting_mode in weighting_modes: header_string += ',"Integer"' header_string += ',"Real"' else: header_string += ',"Integer"' f.write(header_string) f.close() gscript.message("Loading results into attribute table") gscript.run_command( "db.in.ogr", input_=model_output_csv, output=temptable, overwrite=True, quiet=True, ) index_creation = "CREATE INDEX idx_%s_cat" % temptable index_creation += " ON %s (id)" % temptable gscript.run_command("db.execute", sql=index_creation, quiet=True) columns = gscript.read_command("db.columns", table=temptable).splitlines()[1:] orig_cat = gscript.vector_db(allfeatures)[int(segments_layer)]["key"] gscript.run_command( "v.db.join", map_=allfeatures, column=orig_cat, otable=temptable, ocolumn="id", subset_columns=columns, quiet=True, ) if classified_map: for classification, reclass_file in reclass_files.items(): output_map = classified_map + "_" + classification gscript.run_command( "r.reclass", input=raster_segments_map, output=output_map, rules=reclass_file, quiet=True, )
def main(): # lazy imports import grass.temporal as tgis # Get the options type = options["type"] temporal_type = options["temporaltype"] columns = options["columns"] order = options["order"] where = options["where"] separator = gscript.separator(options["separator"]) outpath = options["output"] colhead = flags['c'] # Make sure the temporal database exists tgis.init() sp = tgis.dataset_factory(type, None) dbif = tgis.SQLDatabaseInterfaceConnection() dbif.connect() first = True if gscript.verbosity() > 0 and not outpath: sys.stderr.write("----------------------------------------------\n") if outpath: outfile = open(outpath, 'w') for ttype in temporal_type.split(","): if ttype == "absolute": time = "absolute time" else: time = "relative time" stds_list = tgis.get_dataset_list(type, ttype, columns, where, order, dbif=dbif) # Use the correct order of the mapsets, hence first the current mapset, then # alphabetic ordering mapsets = tgis.get_tgis_c_library_interface().available_mapsets() # Print for each mapset separately for key in mapsets: if key in stds_list.keys(): rows = stds_list[key] if rows: if gscript.verbosity() > 0 and not outpath: if issubclass(sp.__class__, tgis.AbstractMapDataset): sys.stderr.write(_("Time stamped %s maps with %s available in mapset <%s>:\n")% (sp.get_type(), time, key)) else: sys.stderr.write(_("Space time %s datasets with %s available in mapset <%s>:\n")% (sp.get_new_map_instance(None).get_type(), time, key)) # Print the column names if requested if colhead and first: output = "" count = 0 for key in rows[0].keys(): if count > 0: output += separator + str(key) else: output += str(key) count += 1 if outpath: outfile.write("{st}\n".format(st=output)) else: print(output) first = False for row in rows: output = "" count = 0 for col in row: if count > 0: output += separator + str(col) else: output += str(col) count += 1 if outpath: outfile.write("{st}\n".format(st=output)) else: print(output) if outpath: outfile.close() dbif.close()
def main(): # Get the options type = options["type"] temporal_type = options["temporaltype"] columns = options["columns"] order = options["order"] where = options["where"] separator = gscript.separator(options["separator"]) outpath = options["output"] colhead = flags['c'] # Make sure the temporal database exists tgis.init() sp = tgis.dataset_factory(type, None) dbif = tgis.SQLDatabaseInterfaceConnection() dbif.connect() first = True if gscript.verbosity() > 0 and not outpath: sys.stderr.write("----------------------------------------------\n") for ttype in temporal_type.split(","): if ttype == "absolute": time = "absolute time" else: time = "relative time" stds_list = tgis.get_dataset_list(type, ttype, columns, where, order, dbif=dbif) # Use the correct order of the mapsets, hence first the current mapset, then # alphabetic ordering mapsets = tgis.get_tgis_c_library_interface().available_mapsets() if outpath: outfile = open(outpath, 'w') # Print for each mapset separately for key in mapsets: if key in stds_list.keys(): rows = stds_list[key] if rows: if gscript.verbosity() > 0 and not outpath: if issubclass(sp.__class__, tgis.AbstractMapDataset): sys.stderr.write(_("Time stamped %s maps with %s available in mapset <%s>:\n")%\ (sp.get_type(), time, key)) else: sys.stderr.write(_("Space time %s datasets with %s available in mapset <%s>:\n")%\ (sp.get_new_map_instance(None).get_type(), time, key)) # Print the column names if requested if colhead == True and first == True: output = "" count = 0 for key in rows[0].keys(): if count > 0: output += separator + str(key) else: output += str(key) count += 1 if outpath: outfile.write("{st}\n".format(st=output)) else: print output first = False for row in rows: output = "" count = 0 for col in row: if count > 0: output += separator + str(col) else: output += str(col) count += 1 if outpath: outfile.write("{st}\n".format(st=output)) else: print output if outpath: outfile.close() dbif.close()
def test_backslash_separators(): """Check that separtors specified as an escape sequence are correctly evaluated""" assert gs.separator(r"\t") == "\t" assert gs.separator(r"\n") == "\n"
def main(): options, flags = gs.parser() in_filename = options["input"] out_filename = options["output"] input_separator = gs.separator(options["separator"]) prefix = options["prefix"] # https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes date_formats = None if options["recognized_date"]: date_formats = options["recognized_date"].split(",") out_date_format = options["clean_date"] missing_names = options["missing_names"].split(",") # TODO: lowercase the column names if prefix and re.match("[^A-Za-z]", prefix[0]): gs.fatal( _("Prefix (now <{prefix}>) must start with an ASCII letter (a-z or A-Z in English alphabeth)" ), prefix=prefix, ) with open(in_filename, "r", newline="") as infile, open(out_filename, "w", newline="") as outfile: # TODO: Input format to parameters (important) # TODO: Output format to parameters (somewhat less important) input_csv = csv.reader(infile, delimiter=input_separator, quotechar='"') output_csv = csv.writer(outfile, delimiter=",", quotechar='"', lineterminator="\n") for i, row in enumerate(input_csv): # TODO: Optionally remove newlines from cells. # In header and body replace by space (and turns into underscore for header). if i == 0: new_row = [] num_unnamed_columns = 0 duplicated_number = 2 # starting at two fro duplicated names for column_number, column in enumerate(row): if date_formats: column = reformat_date(date_formats, out_date_format, column) if not column: if not num_unnamed_columns: column = missing_names[0] elif len(missing_names) == 1: column = f"{missing_names[0]}_{column_number + 1}" elif num_unnamed_columns < len(missing_names): column = missing_names[num_unnamed_columns] else: column = f"{missing_names[-1]}_{name_duplicated}" duplicated_number += 1 num_unnamed_columns += 1 column = minimize_whitespace(column) # TODO: Also duplicate column names should be resolved here. # Perhaps just move the else of no column names here or perhaps not # because it would be difficult to navigate the code. column = make_name_sql_compliant(column, fallback_prefix=prefix) new_row.append(column) else: # TODO: Optionally reformat dates in the body too (but without prefix). # TODO: Recognize numbers with spaces and commas and fix them. # For example, 10,000 and 10 000,5 should/might be # 10000 (or 10.0) 10000.5. # TODO: General find and replace for cells (which could take care of some escape chars # or other mess. Question is how to make it general/more than one replace pair. # (Remove would be easier to have in the interface.) new_row = [] row_has_content = False for column in row: if column: row_has_content = True # TODO: Use bools for this, perhaps a dedicated class for this type of option. # This is an experiment with extremely aggressive replacemt of flags by options. if "collapse_whitespace" in options["cell_clean"]: column = collapse_whitespace(column) if "strip_whitespace" in options["cell_clean"]: column = column.strip() if date_formats and "date_format" in options["cell_clean"]: column = reformat_date(date_formats, out_date_format, column) new_row.append(column) # Skips completely empty rows and rows with only separators. if not row_has_content: continue # TODO: Add except csv.Error as error: output_csv.writerow(new_row)
def main(): global insert_sql insert_sql = None global temporary_vect temporary_vect = None global stats_temp_file stats_temp_file = None global content content = None global raster raster = options['raster'] global decimals decimals = int(options['decimals']) global zone_map zone_map = options['zone_map'] csvfile = options['csvfile'] if options['csvfile'] else [] separator = gscript.separator(options['separator']) prefix = options['prefix'] if options['prefix'] else [] classes_list = options['classes_list'].split( ',') if options['classes_list'] else [] vectormap = options['vectormap'] if options['vectormap'] else [] prop = False if 'proportion' not in options['statistics'].split( ',') else True mode = False if 'mode' not in options['statistics'].split(',') else True # Check if input layer is CELL if gscript.parse_command('r.info', flags='g', map=raster)['datatype'] != 'CELL': gscript.fatal( _("The type of the input map 'raster' is not CELL. Please use raster with integer values" )) if gscript.parse_command('r.info', flags='g', map=zone_map)['datatype'] != 'CELL': gscript.fatal( _("The type of the input map 'zone_map' is not CELL. Please use raster with integer values" )) # Check if 'decimals' is + and with credible value if decimals <= 0: gscript.fatal(_("The number of decimals should be positive")) if decimals > 100: gscript.fatal(_("The number of decimals should not be more than 100")) # Adjust region to input map is flag active if flags['r']: gscript.use_temp_region() gscript.run_command('g.region', raster=zone_map) # R.STATS tmpfile = gscript.tempfile() try: if flags['n']: gscript.run_command( 'r.stats', overwrite=True, flags='c', input='%s,%s' % (zone_map, raster), output=tmpfile, separator=separator) # Consider null values in R.STATS else: gscript.run_command( 'r.stats', overwrite=True, flags='cn', input='%s,%s' % (zone_map, raster), output=tmpfile, separator=separator) # Do not consider null values in R.STATS gscript.message(_("r.stats command finished...")) except: gscript.fatal(_("The execution of r.stats failed")) # COMPUTE STATISTICS # Open csv file and create a csv reader rstatsfile = open(tmpfile, 'r') reader = csv.reader(rstatsfile, delimiter=separator) # Total pixels per category per zone totals_dict = {} for row in reader: if row[0] not in totals_dict: # Will pass the condition only if the current zone ID does not exists in the dictionary totals_dict[row[0]] = { } # Declare a new embedded dictionnary for the current zone ID totals_dict[row[0]][row[1]] = int(row[2]) # Delete key '*' in 'totals_dict' that could append if there are null values on the zone raster if '*' in totals_dict: del totals_dict['*'] # Close file rstatsfile.close() # Mode if mode: modalclass_dict = {} for ID in totals_dict: # The trick was found here : https://stackoverflow.com/a/268285/8013239 mode = max(iter(totals_dict[ID].items()), key=operator.itemgetter(1))[0] if mode == '*': # If the mode is NULL values modalclass_dict[ID] = 'NULL' else: modalclass_dict[ID] = mode # Classes proportions if prop: # Get list of categories to output if classes_list: #If list of classes provided by user class_dict = {str(a): '' for a in classes_list } #To be sure it's string format else: class_dict = {} # Proportion of each category per zone proportion_dict = {} for ID in totals_dict: proportion_dict[ID] = {} for cl in totals_dict[ID]: if flags['p']: proportion_dict[ID][cl] = round( float(totals_dict[ID][cl]) / sum(totals_dict[ID].values()) * 100, decimals) else: proportion_dict[ID][cl] = round( float(totals_dict[ID][cl]) / sum(totals_dict[ID].values()), decimals) if cl == '*': class_dict['NULL'] = '' else: class_dict[cl] = '' # Fill class not met in the raster with zero for ID in proportion_dict: for cl in class_dict: if cl not in proportion_dict[ID].keys(): proportion_dict[ID][cl] = '{:.{}f}'.format(0, decimals) # Get list of class sorted by value (arithmetic) if 'NULL' in class_dict.keys(): class_list = [int(k) for k in class_dict.keys() if k != 'NULL'] class_list.sort() class_list.append('NULL') else: class_list = [int(k) for k in class_dict.keys()] class_list.sort() gscript.verbose(_("Statistics computed...")) # OUTPUT CONTENT # Header header = [ 'cat', ] if mode: if prefix: header.append('%s_mode' % prefix) else: header.append('mode') if prop: if prefix: [header.append('%s_prop_%s' % (prefix, cl)) for cl in class_list] else: [header.append('prop_%s' % cl) for cl in class_list] # Values value_dict = {} for ID in totals_dict: value_dict[ID] = [] if mode: value_dict[ID].append(modalclass_dict[ID]) if prop: for cl in class_list: value_dict[ID].append(proportion_dict[ID]['%s' % cl]) # WRITE OUTPUT if csvfile: outfile = open(csvfile, 'w') writer = csv.writer(outfile, delimiter=separator) writer.writerow(header) csvcontent_dict = copy.deepcopy(value_dict) [csvcontent_dict[ID].insert(0, ID) for ID in csvcontent_dict] [csvcontent_dict[ID] for ID in csvcontent_dict] writer.writerows(csvcontent_dict.values()) outfile.close() if vectormap: gscript.message(_("Creating output vector map...")) temporary_vect = 'rzonalclasses_tmp_vect_%d' % os.getpid() gscript.run_command('r.to.vect', input_=zone_map, output=temporary_vect, type_='area', flags='vt', overwrite=True, quiet=True) insert_sql = gscript.tempfile() fsql = open(insert_sql, 'w') fsql.write('BEGIN TRANSACTION;\n') if gscript.db_table_exist(temporary_vect): if gscript.overwrite(): fsql.write('DROP TABLE %s;' % temporary_vect) else: gscript.fatal( _("Table %s already exists. Use --o to overwrite") % temporary_vect) create_statement = 'CREATE TABLE ' + temporary_vect + ' (cat int PRIMARY KEY);\n' fsql.write(create_statement) for col in header[1:]: if col.split('_')[-1] == 'mode': # Mode column should be integer addcol_statement = 'ALTER TABLE %s ADD COLUMN %s integer;\n' % ( temporary_vect, col) else: # Proportions column should be double precision addcol_statement = 'ALTER TABLE %s ADD COLUMN %s double precision;\n' % ( temporary_vect, col) fsql.write(addcol_statement) for key in value_dict: insert_statement = 'INSERT INTO %s VALUES (%s, %s);\n' % ( temporary_vect, key, ','.join( [str(x) for x in value_dict[key]])) fsql.write(insert_statement) fsql.write('END TRANSACTION;') fsql.close() gscript.run_command('db.execute', input=insert_sql, quiet=True) gscript.run_command('v.db.connect', map_=temporary_vect, table=temporary_vect, quiet=True) gscript.run_command('g.copy', vector='%s,%s' % (temporary_vect, vectormap), quiet=True)
def main(): global insert_sql insert_sql = None global temporary_vect temporary_vect = None global stats_temp_file stats_temp_file = None segment_map = options["map"] csvfile = options["csvfile"] if options["csvfile"] else [] vectormap = options["vectormap"] if options["vectormap"] else [] global rasters rasters = options["rasters"].split(",") if options["rasters"] else [] area_measures = ( options["area_measures"].split(",") if (options["area_measures"] and not flags["s"]) else [] ) neighborhood = True if flags["n"] else False if neighborhood: if not gscript.find_program("r.neighborhoodmatrix", "--help"): message = _("You need to install the addon r.neighborhoodmatrix to be able") message += _(" to calculate area measures.\n") message += _( " You can install the addon with 'g.extension r.neighborhoodmatrix'" ) gscript.fatal(message) raster_statistics = ( options["raster_statistics"].split(",") if options["raster_statistics"] else [] ) separator = gscript.separator(options["separator"]) processes = int(options["processes"]) output_header = ["cat"] output_dict = collections.defaultdict(list) raster_stat_dict = { "zone": 0, "min": 4, "third_quart": 16, "max": 5, "sum": 12, "null_cells": 3, "median": 15, "label": 1, "first_quart": 14, "range": 6, "mean_of_abs": 8, "stddev": 9, "non_null_cells": 2, "coeff_var": 11, "variance": 10, "sum_abs": 13, "perc_90": 17, "mean": 7, } geometry_stat_dict = { "cat": 0, "area": 1, "perimeter": 2, "compact_square": 3, "compact_circle": 4, "fd": 5, "xcoords": 6, "ycoords": 7, } if flags["r"]: gscript.use_temp_region() gscript.run_command("g.region", raster=segment_map) stats_temp_file = gscript.tempfile() if area_measures: gscript.message(_("Calculating geometry statistics...")) output_header += area_measures stat_indices = [geometry_stat_dict[x] for x in area_measures] gscript.run_command( "r.object.geometry", input_=segment_map, output=stats_temp_file, overwrite=True, quiet=True, ) firstline = True with open(stats_temp_file, "r") as fin: for line in fin: if firstline: firstline = False continue values = line.rstrip().split("|") output_dict[values[0]] = [values[x] for x in stat_indices] if rasters: if not flags["c"]: gscript.message(_("Checking usability of raster maps...")) rasters_to_remove = [] for raster in rasters: null_values_found = False if not gscript.find_file(raster, element="cell")["name"]: gscript.message(_("Cannot find raster '%s'" % raster)) gscript.message(_("Removing this raster from list.")) rasters_to_remove.append(raster) continue current_mapset = gscript.gisenv()["MAPSET"] if gscript.find_file("MASK", element="cell", mapset=current_mapset)[ "name" ]: null_test = gscript.read_command( "r.stats", flags="N", input_=["MASK", raster], quiet=True ).splitlines() if "1 *" in null_test: null_values_found = True else: raster_info = gscript.parse_command( "r.univar", flags="g", map_=raster, quiet=True ) if len(raster_info) == 0 or int(raster_info["null_cells"]) > 0: null_values_found = True if null_values_found: message = "Raster <%s> contains null values.\n" % raster message += "This can lead to errors in the calculations.\n" message += "Check region settings and raster extent.\n" message += "Possibly fill null values of raster.\n" message += "Removing this raster from list." gscript.warning(message) rasters_to_remove.append(raster) for raster in rasters_to_remove: rasters.remove(raster) if len(rasters) > 0: gscript.message(_("Calculating statistics for the following raster maps:")) gscript.message(",".join(rasters)) if len(rasters) < processes: processes = len(rasters) gscript.message( _( "Only one process per raster. Reduced number of processes to %i." % processes ) ) stat_indices = [raster_stat_dict[x] for x in raster_statistics] pool = Pool(processes) func = partial(worker, segment_map, stats_temp_file) pool.map(func, rasters) pool.close() pool.join() for raster in rasters: rastername = raster.split("@")[0] rastername = rastername.replace(".", "_") temp_file = stats_temp_file + "." + rastername output_header += [rastername + "_" + x for x in raster_statistics] firstline = True with open(temp_file, "r") as fin: for line in fin: if firstline: firstline = False continue values = line.rstrip().split("|") output_dict[values[0]] = output_dict[values[0]] + [ values[x] for x in stat_indices ] # Calculating neighborhood statistics if requested if neighborhood: gscript.message(_("Calculating neighborhood statistics...")) # Add neighbordhood statistics to headers original_nb_values = len(output_header) - 1 new_headers = ["neighbors_count"] for i in range(1, len(output_header)): new_headers.append("%s_nbrmean" % output_header[i]) new_headers.append("%s_nbrstddev" % output_header[i]) output_header += new_headers # Get sorted neighborhood matrix nbr_matrix = sorted( [ x.split("|") for x in gscript.read_command( "r.neighborhoodmatrix", input_=segment_map, flags="d", quiet=True ).splitlines() ] ) # Calculate mean and stddev of neighbor values for each variable in the # output_dict for key, group in groupby(nbr_matrix, lambda x: x[0]): d = {} for i in range(original_nb_values): d[i] = (0, 0, 0) nbrlist = [str(x[1]) for x in group] if len(nbrlist) > 1: for nbr in nbrlist: for i in range(original_nb_values): d[i] = update(d[i], float(output_dict[nbr][i])) output_dict[key] = output_dict[key] + [str(len(nbrlist))] output_dict[key] = output_dict[key] + [ str(i) for sub in [finalize(x) for x in d.values()] for i in sub ] else: newvalues = ["1"] nbr = nbrlist[0] for i in range(original_nb_values): newvalues.append(output_dict[nbr][i]) newvalues.append("0") output_dict[key] = output_dict[key] + newvalues message = _("Some values could not be calculated for the objects below. ") message += _("These objects are thus not included in the results. ") message += _("HINT: Check some of the raster maps for null values ") message += _("and possibly fill these values with r.fillnulls.") error_objects = [] if csvfile: with open(csvfile, "w") as f: f.write(separator.join(output_header) + "\n") for key in output_dict: if len(output_dict[key]) + 1 == len(output_header): f.write(key + separator + separator.join(output_dict[key]) + "\n") else: error_objects.append(key) f.close() if vectormap: gscript.message(_("Creating output vector map...")) temporary_vect = "segmstat_tmp_vect_%d" % os.getpid() gscript.run_command( "r.to.vect", input_=segment_map, output=temporary_vect, type_="area", flags="vt", overwrite=True, quiet=True, ) insert_sql = gscript.tempfile() fsql = open(insert_sql, "w") fsql.write("BEGIN TRANSACTION;\n") if gscript.db_table_exist(temporary_vect): if gscript.overwrite(): fsql.write("DROP TABLE %s;" % temporary_vect) else: gscript.fatal( _("Table %s already exists. Use --o to overwrite" % temporary_vect) ) create_statement = ( "CREATE TABLE " + temporary_vect + " (cat int PRIMARY KEY);\n" ) fsql.write(create_statement) for header in output_header[1:]: addcol_statement = "ALTER TABLE %s ADD COLUMN %s double precision;\n" % ( temporary_vect, header, ) fsql.write(addcol_statement) for key in output_dict: if len(output_dict[key]) + 1 == len(output_header): sql = "INSERT INTO %s VALUES (%s, %s);\n" % ( temporary_vect, key, ",".join(output_dict[key]), ) sql = sql.replace("inf", "NULL") sql = sql.replace("nan", "NULL") fsql.write(sql) else: if not csvfile: error_objects.append(key) fsql.write("END TRANSACTION;") fsql.close() gscript.run_command("db.execute", input=insert_sql, quiet=True) gscript.run_command( "v.db.connect", map_=temporary_vect, table=temporary_vect, quiet=True ) gscript.run_command( "g.copy", vector="%s,%s" % (temporary_vect, vectormap), quiet=True ) if error_objects: object_string = ", ".join(error_objects[:100]) message += _( "\n\nObjects with errors (only first 100 are shown):\n%s" % object_string ) gscript.message(message)
def main(options, flags): import grass.pygrass.modules as pymod import grass.temporal as tgis from grass.pygrass.vector import VectorTopo invect = options["input"] if invect.find("@") != -1: invect = invect.split("@")[0] incol = options["date_column"] indate = options["date"] endcol = options["final_date_column"] enddate = options["final_date"] strds = options["strds"] nprocs = options["nprocs"] if strds.find("@") != -1: strds_name = strds.split("@")[0] else: strds_name = strds output = options["output"] if options["columns"]: cols = options["columns"].split(",") else: cols = [] mets = options["method"].split(",") gran = options["granularity"] dateformat = options["date_format"] separator = gscript.separator(options["separator"]) update = flags["u"] create = flags["c"] stdout = False if output != "-" and update: gscript.fatal(_("Cannot combine 'output' option and 'u' flag")) elif output != "-" and create: gscript.fatal(_("Cannot combine 'output' option and 'c' flag")) elif output == "-" and (update or create): if update and not cols: gscript.fatal(_("Please set 'columns' option")) output = invect else: stdout = True if create: cols = [] for m in mets: colname = "{st}_{me}".format(st=strds_name, me=m) cols.append(colname) try: pymod.Module( "v.db.addcolumn", map=invect, columns="{col} " "double precision".format(col=colname), ) except CalledModuleError: gscript.fatal( _("Not possible to create column " "{col}".format(col=colname))) gscript.warning( _("Attribute table of vector {name} will be updated" "...").format(name=invect)) elif update: colexist = pymod.Module("db.columns", table=invect, stdout_=PI).outputs.stdout.splitlines() for col in cols: if col not in colexist: gscript.fatal( _("Column '{}' does not exist, please create it first". format(col))) gscript.warning( _("Attribute table of vector {name} will be updated" "...").format(name=invect)) if output != "-" and len(cols) != len(mets): gscript.fatal( _("'columns' and 'method' options must have the same " "number of elements")) tgis.init() dbif = tgis.SQLDatabaseInterfaceConnection() dbif.connect() sp = tgis.open_old_stds(strds, "strds", dbif) if sp.get_temporal_type() == "absolute": if gran: delta = int(tgis.gran_to_gran(gran, sp.get_granularity(), True)) if tgis.gran_singular_unit(gran) in ["year", "month"]: delta = int(tgis.gran_to_gran(gran, "1 day", True)) td = timedelta(delta) elif tgis.gran_singular_unit(gran) == "day": delta = tgis.gran_to_gran(gran, sp.get_granularity(), True) td = timedelta(delta) elif tgis.gran_singular_unit(gran) == "hour": td = timedelta(hours=delta) elif tgis.gran_singular_unit(gran) == "minute": td = timedelta(minutes=delta) elif tgis.gran_singular_unit(gran) == "second": td = timedelta(seconds=delta) else: td = None else: if sp.get_granularity() >= int(gran): gscript.fatal( _("Input granularity is smaller or equal to the {iv}" " STRDS granularity".format(iv=strds))) td = int(gran) if incol and indate: gscript.fatal(_("Cannot combine 'date_column' and 'date' options")) elif not incol and not indate: gscript.fatal(_("You have to fill 'date_column' or 'date' option")) if incol: if endcol: mysql = "SELECT DISTINCT {dc},{ec} from {vmap} order by " "{dc}".format( vmap=invect, dc=incol, ec=endcol) else: mysql = "SELECT DISTINCT {dc} from {vmap} order by " "{dc}".format( vmap=invect, dc=incol) try: dates = pymod.Module("db.select", flags="c", stdout_=PI, stderr_=PI, sql=mysql) mydates = dates.outputs["stdout"].value.splitlines() except CalledModuleError: gscript.fatal(_("db.select return an error")) elif indate: if enddate: mydates = ["{ida}|{eda}".format(ida=indate, eda=enddate)] else: mydates = [indate] mydates = [indate] pymap = VectorTopo(invect) pymap.open("r") if len(pymap.dblinks) == 0: try: pymap.close() pymod.Module("v.db.addtable", map=invect) except CalledModuleError: dbif.close() gscript.fatal( _("Unable to add table <%s> to vector map " "<%s>" % invect)) if pymap.is_open(): pymap.close() qfeat = pymod.Module("v.category", stdout_=PI, stderr_=PI, input=invect, option="print") myfeats = qfeat.outputs["stdout"].value.splitlines() if stdout: outtxt = "" for data in mydates: try: start, final = data.split("|") except ValueError: start = data final = None if sp.get_temporal_type() == "absolute": fdata = datetime.strptime(start, dateformat) else: fdata = int(start) if final: sdata = datetime.strptime(final, dateformat) elif flags["a"]: sdata = fdata + td else: sdata = fdata fdata = sdata - td mwhere = "start_time >= '{inn}' and start_time < " "'{out}'".format( inn=fdata, out=sdata) lines = None try: r_what = pymod.Module( "t.rast.what", points=invect, strds=strds, layout="timerow", separator=separator, flags="v", where=mwhere, quiet=True, stdout_=PI, stderr_=PI, nprocs=nprocs, ) lines = r_what.outputs["stdout"].value.splitlines() except CalledModuleError: gscript.warning("t.rast.what faild with where='{}'".format(mwhere)) pass if incol: if endcol: mysql = ("SELECT DISTINCT cat from {vmap} where {dc}='{da}' " "AND {ec}='{ed}' order by cat".format(vmap=invect, da=start, dc=incol, ed=final, ec=endcol)) else: mysql = ("SELECT DISTINCT cat from {vmap} where {dc}='{da}' " "order by cat".format(vmap=invect, da=start, dc=incol)) try: qfeat = pymod.Module("db.select", flags="c", stdout_=PI, stderr_=PI, sql=mysql) myfeats = qfeat.outputs["stdout"].value.splitlines() except CalledModuleError: gscript.fatal( _("db.select returned an error for date " "{da}".format(da=start))) if not lines and stdout: for feat in myfeats: outtxt += "{di}{sep}{da}".format(di=feat, da=start, sep=separator) for n in range(len(mets)): outtxt += "{sep}{val}".format(val="*", sep=separator) outtxt += "\n" if not lines: continue x = 0 for line in lines: vals = line.split(separator) if vals[0] in myfeats: try: nvals = np.array(vals[3:]).astype(float) except ValueError: if stdout: outtxt += "{di}{sep}{da}".format(di=vals[0], da=start, sep=separator) for n in range(len(mets)): outtxt += "{sep}{val}".format(val="*", sep=separator) outtxt += "\n" continue if stdout: outtxt += "{di}{sep}{da}".format(di=vals[0], da=start, sep=separator) for n in range(len(mets)): result = None if len(nvals) == 1: result = nvals[0] elif len(nvals) > 1: result = return_value(nvals, mets[n]) if stdout: if not result: result = "*" outtxt += "{sep}{val}".format(val=result, sep=separator) else: try: if incol: mywhe = "{dc}='{da}' AND ".format(da=start, dc=incol) if endcol: mywhe += "{dc}='{da}' AND ".format( da=final, dc=endcol) mywhe += "cat={ca}".format(ca=vals[0]) pymod.Module( "v.db.update", map=output, column=cols[n], value=str(result), where=mywhe, ) else: pymod.Module( "v.db.update", map=output, column=cols[n], value=str(result), where="cat={ca}".format(ca=vals[0]), ) except CalledModuleError: gscript.fatal(_("v.db.update return an error")) if stdout: outtxt += "\n" if x == len(myfeats): break else: x += 1 if stdout: print(outtxt)
def main(): global insert_sql insert_sql = None global temporary_vect temporary_vect = None global stats_temp_file stats_temp_file = None global content content = None global raster raster = options["raster"] global decimals decimals = int(options["decimals"]) global zone_map zone_map = options["zone_map"] csvfile = options["csvfile"] if options["csvfile"] else [] separator = gscript.separator(options["separator"]) prefix = options["prefix"] if options["prefix"] else [] classes_list = options["classes_list"].split( ",") if options["classes_list"] else [] vectormap = options["vectormap"] if options["vectormap"] else [] prop = False if "proportion" not in options["statistics"].split( ",") else True mode = False if "mode" not in options["statistics"].split(",") else True if flags[ "c"]: # Check only if flag activated - Can be bottleneck in case of very large raster. # Check if input layer is CELL if gscript.parse_command("r.info", flags="g", map=raster)["datatype"] != "CELL": gscript.fatal( _("The type of the input map 'raster' is not CELL. Please use raster with integer values" )) if (gscript.parse_command("r.info", flags="g", map=zone_map)["datatype"] != "CELL"): gscript.fatal( _("The type of the input map 'zone_map' is not CELL. Please use raster with integer values" )) # Check if 'decimals' is + and with credible value if decimals <= 0: gscript.fatal(_("The number of decimals should be positive")) if decimals > 100: gscript.fatal(_("The number of decimals should not be more than 100")) # Adjust region to input map is flag active if flags["r"]: gscript.use_temp_region() gscript.run_command("g.region", raster=zone_map) # R.STATS tmpfile = gscript.tempfile() try: if flags["n"]: gscript.run_command( "r.stats", overwrite=True, flags="c", input="%s,%s" % (zone_map, raster), output=tmpfile, separator=separator, ) # Consider null values in R.STATS else: gscript.run_command( "r.stats", overwrite=True, flags="cn", input="%s,%s" % (zone_map, raster), output=tmpfile, separator=separator, ) # Do not consider null values in R.STATS gscript.message(_("r.stats command finished...")) except: gscript.fatal(_("The execution of r.stats failed")) # COMPUTE STATISTICS # Open csv file and create a csv reader rstatsfile = open(tmpfile, "r") reader = csv.reader(rstatsfile, delimiter=separator) # Total pixels per category per zone totals_dict = {} for row in reader: if ( row[0] not in totals_dict ): # Will pass the condition only if the current zone ID does not exists yet in the dictionary totals_dict[row[0]] = { } # Declare a new embedded dictionnary for the current zone ID if ( flags["l"] and row[1] in classes_list ): # Will pass only if flag -l is active and if the current class is in the 'classes_list' totals_dict[row[0]][row[1]] = int(row[2]) else: totals_dict[row[0]][row[1]] = int(row[2]) # Delete key '*' in 'totals_dict' that could append if there are null values on the zone raster if "*" in totals_dict: del totals_dict["*"] # Close file rstatsfile.close() # Get list of ID id_list = [ID for ID in totals_dict] # Mode if mode: modalclass_dict = {} for ID in id_list: # The trick was found here : https://stackoverflow.com/a/268285/8013239 mode = max(iter(totals_dict[ID].items()), key=operator.itemgetter(1))[0] if mode == "*": # If the mode is NULL values modalclass_dict[ID] = "NULL" else: modalclass_dict[ID] = mode # Class proportions if prop: # Get list of categories to output if classes_list: # If list of classes provided by user class_dict = {str(int(a)): "" for a in classes_list } # To be sure it's string format else: class_dict = {} # Proportion of each category per zone proportion_dict = {} for ID in id_list: proportion_dict[ID] = {} for cl in totals_dict[ID]: if ( flags["l"] and cl not in classes_list ): # with flag -l, output will contain only classes from 'classes_list' continue if flags["p"]: prop_value = (float(totals_dict[ID][cl]) / sum(totals_dict[ID].values()) * 100) else: prop_value = float(totals_dict[ID][cl]) / sum( totals_dict[ID].values()) proportion_dict[ID][cl] = "{:.{}f}".format( prop_value, decimals) if cl == "*": class_dict["NULL"] = "" else: class_dict[cl] = "" # Fill class not met in the raster with zero for ID in proportion_dict: for cl in class_dict: if cl not in proportion_dict[ID].keys(): proportion_dict[ID][cl] = "{:.{}f}".format(0, decimals) # Get list of class sorted by value (arithmetic ordering) if "NULL" in class_dict.keys(): class_list = sorted( [int(k) for k in class_dict.keys() if k != "NULL"]) class_list.append("NULL") else: class_list = sorted([int(k) for k in class_dict.keys()]) gscript.verbose(_("Statistics computed...")) # Set 'totals_dict' to None to try RAM release totals_dict = None # OUTPUT CONTENT # Header header = [ "cat", ] if mode: if prefix: header.append("%s_mode" % prefix) else: header.append("mode") if prop: if prefix: [header.append("%s_prop_%s" % (prefix, cl)) for cl in class_list] else: [header.append("prop_%s" % cl) for cl in class_list] # Values value_dict = {} for ID in id_list: value_dict[ID] = [] value_dict[ID].append(ID) if mode: value_dict[ID].append(modalclass_dict[ID]) if prop: for cl in class_list: value_dict[ID].append(proportion_dict[ID]["%s" % cl]) # WRITE OUTPUT if csvfile: with open(csvfile, "w", newline="") as outfile: writer = csv.writer(outfile, delimiter=separator) writer.writerow(header) writer.writerows(value_dict.values()) if vectormap: gscript.message(_("Creating output vector map...")) temporary_vect = "rzonalclasses_tmp_vect_%d" % os.getpid() gscript.run_command( "r.to.vect", input_=zone_map, output=temporary_vect, type_="area", flags="vt", overwrite=True, quiet=True, ) insert_sql = gscript.tempfile() with open(insert_sql, "w", newline="") as fsql: fsql.write("BEGIN TRANSACTION;\n") if gscript.db_table_exist(temporary_vect): if gscript.overwrite(): fsql.write("DROP TABLE %s;" % temporary_vect) else: gscript.fatal( _("Table %s already exists. Use --o to overwrite") % temporary_vect) create_statement = ("CREATE TABLE %s (cat int PRIMARY KEY);\n" % temporary_vect) fsql.write(create_statement) for col in header[1:]: if col.split( "_")[-1] == "mode": # Mode column should be integer addcol_statement = "ALTER TABLE %s ADD COLUMN %s integer;\n" % ( temporary_vect, col, ) else: # Proportions column should be double precision addcol_statement = ( "ALTER TABLE %s ADD COLUMN %s double precision;\n" % (temporary_vect, col)) fsql.write(addcol_statement) for key in value_dict: insert_statement = "INSERT INTO %s VALUES (%s);\n" % ( temporary_vect, ",".join(value_dict[key]), ) fsql.write(insert_statement) fsql.write("END TRANSACTION;") gscript.run_command("db.execute", input=insert_sql, quiet=True) gscript.run_command("v.db.connect", map_=temporary_vect, table=temporary_vect, quiet=True) gscript.run_command("g.copy", vector="%s,%s" % (temporary_vect, vectormap), quiet=True)
def main(): # lazy imports import grass.temporal as tgis # Get the options input = options["input"] where = options["where"] columns = options["columns"] tempwhere = options["t_where"] layer = options["layer"] separator = grass.separator(options["separator"]) if where == "" or where == " " or where == "\n": where = None if columns == "" or columns == " " or columns == "\n": columns = None # Make sure the temporal database exists tgis.init() sp = tgis.open_old_stds(input, "stvds") rows = sp.get_registered_maps("name,layer,mapset,start_time,end_time", tempwhere, "start_time", None) col_names = "" if rows: for row in rows: vector_name = "%s@%s" % (row["name"], row["mapset"]) # In case a layer is defined in the vector dataset, # we override the option layer if row["layer"]: layer = row["layer"] select = grass.read_command( "v.db.select", map=vector_name, layer=layer, columns=columns, separator="%s" % (separator), where=where, ) if not select: grass.fatal( _("Unable to run v.db.select for vector map <%s> " "with layer %s") % (vector_name, layer)) # The first line are the column names list = select.split("\n") count = 0 for entry in list: if entry.strip() != "": # print the column names in case they change if count == 0: col_names_new = "start_time%send_time%s%s" % ( separator, separator, entry, ) if col_names != col_names_new: col_names = col_names_new print(col_names) else: if row["end_time"]: print("%s%s%s%s%s" % ( row["start_time"], separator, row["end_time"], separator, entry, )) else: print("%s%s%s%s" % (row["start_time"], separator, separator, entry)) count += 1
def main(options, flags): # lazy imports import grass.temporal as tgis import grass.pygrass.modules as pymod # Get the options points = options["points"] coordinates = options["coordinates"] strds = options["strds"] output = options["output"] where = options["where"] order = options["order"] layout = options["layout"] null_value = options["null_value"] separator = gscript.separator(options["separator"]) nprocs = int(options["nprocs"]) write_header = flags["n"] use_stdin = flags["i"] vcat = flags["v"] #output_cat_label = flags["f"] #output_color = flags["r"] #output_cat = flags["i"] overwrite = gscript.overwrite() if coordinates and points: gscript.fatal( _("Options coordinates and points are mutually exclusive")) if not coordinates and not points and not use_stdin: gscript.fatal( _("Please specify the coordinates, the points option or use the 'i' flag to pipe coordinate positions to t.rast.what from stdin, to provide the sampling coordinates" )) if vcat and not points: gscript.fatal(_("Flag 'v' required option 'points'")) if use_stdin: coordinates_stdin = str(sys.__stdin__.read()) # Check if coordinates are given with site names or IDs stdin_length = len(coordinates_stdin.split('\n')[0].split()) if stdin_length <= 2: site_input = False elif stdin_length >= 3: site_input = True else: site_input = False # Make sure the temporal database exists tgis.init() # We need a database interface dbif = tgis.SQLDatabaseInterfaceConnection() dbif.connect() sp = tgis.open_old_stds(strds, "strds", dbif) maps = sp.get_registered_maps_as_objects(where=where, order=order, dbif=dbif) dbif.close() if not maps: gscript.fatal( _("Space time raster dataset <%s> is empty") % sp.get_id()) # Setup flags are disabled due to test issues flags = "" #if output_cat_label is True: # flags += "f" #if output_color is True: # flags += "r" #if output_cat is True: # flags += "i" if vcat is True: flags += "v" # Configure the r.what module if points: r_what = pymod.Module("r.what", map="dummy", output="dummy", run_=False, separator=separator, points=points, overwrite=overwrite, flags=flags, null_value=null_value, quiet=True) elif coordinates: # Create a list of values coord_list = coordinates.split(",") r_what = pymod.Module("r.what", map="dummy", output="dummy", run_=False, separator=separator, coordinates=coord_list, overwrite=overwrite, flags=flags, null_value=null_value, quiet=True) elif use_stdin: r_what = pymod.Module("r.what", map="dummy", output="dummy", run_=False, separator=separator, stdin_=coordinates_stdin, overwrite=overwrite, flags=flags, null_value=null_value, quiet=True) else: gscript.error(_("Please specify points or coordinates")) if len(maps) < nprocs: nprocs = len(maps) # The module queue for parallel execution process_queue = pymod.ParallelModuleQueue(int(nprocs)) num_maps = len(maps) # 400 Maps is the absolute maximum in r.what # We need to determie the number of maps that can be processed # in parallel # First estimate the number of maps per process. We use 400 maps # simultaniously as maximum for a single process num_loops = int(num_maps / (400 * nprocs)) remaining_maps = num_maps % (400 * nprocs) if num_loops == 0: num_loops = 1 remaining_maps = 0 # Compute the number of maps for each process maps_per_loop = int((num_maps - remaining_maps) / num_loops) maps_per_process = int(maps_per_loop / nprocs) remaining_maps_per_loop = maps_per_loop % nprocs # We put the output files in an ordered list output_files = [] output_time_list = [] count = 0 for loop in range(num_loops): file_name = gscript.tempfile() + "_%i" % (loop) count = process_loop(nprocs, maps, file_name, count, maps_per_process, remaining_maps_per_loop, output_files, output_time_list, r_what, process_queue) process_queue.wait() gscript.verbose("Number of raster map layers remaining for sampling %i" % (remaining_maps)) if remaining_maps > 0: # Use a single process if less then 100 maps if remaining_maps <= 100: map_names = [] for i in range(remaining_maps): map = maps[count] map_names.append(map.get_id()) count += 1 mod = copy.deepcopy(r_what) mod(map=map_names, output=file_name) process_queue.put(mod) else: maps_per_process = int(remaining_maps / nprocs) remaining_maps_per_loop = remaining_maps % nprocs file_name = "out_remain" process_loop(nprocs, maps, file_name, count, maps_per_process, remaining_maps_per_loop, output_files, output_time_list, r_what, process_queue) # Wait for unfinished processes process_queue.wait() # Out the output files in the correct order together if layout == "row": one_point_per_row_output(separator, output_files, output_time_list, output, write_header, site_input, vcat) elif layout == "col": one_point_per_col_output(separator, output_files, output_time_list, output, write_header, site_input, vcat) else: one_point_per_timerow_output(separator, output_files, output_time_list, output, write_header, site_input, vcat)
def main(): options, flags = gs.parser() # lazy imports import grass.temporal as tgis # Get the options # Parser does not ensure that the input exists. input = options["input"] columns = options["columns"] order = options["order"] where = options["where"] separator = gs.separator(options["separator"]) method = options["method"] granule = options["granule"] header = flags["u"] output = options["output"] output_format = options["format"] if output_format == "csv": if len(separator) > 1: gs.fatal( message_option_value_excludes_option_value( option_name="format", option_value=output_format, excluded_option_name="separator", excluded_option_value=separator, reason=_( "A standard CSV separator (delimiter) is only one character long" ), ) ) if separator == "|": # We use comma as the default for separator, so we override the pipe. # This does not allow for users to generate CSV with pipe, but unlike # the C API, the Python interface specs does not allow reseting the default # except for setting it to an empty string which does not have a precedence # in the current code and the behavior is unclear. separator = "," if output_format in ["json", "yaml"] and header: gs.fatal( message_option_value_excludes_flag( option_name="format", option_value=output_format, flag_name="u", reason=_("Column names are always included"), ) ) # We ignore when separator is set for JSON and YAML because of the default # value which is always there (see above). Having no default and producing # an error when set would be more clear and would fit with using different # defaults for plain and CSV formats. elif (output_format == "line" or method == "comma") and separator == "|": # Same as for CSV: Custom default needed. # Pipe is currently not supported at all. separator = "," if method in ["delta", "deltagaps", "gran"]: if order: gs.fatal( message_option_value_excludes_option( option_name="method", option_value=method, excluded_option_name="order", reason=_("Values are always ordered by start_time"), ) ) if columns: columns_list = columns.split(",") for column in [ "semantic_label", "creator", "temporal_type", "creation_time", "north", "south", "west", "east", "nsres", "ewres", "cols", "rows", "number_of_cells", "min", "max", ]: if column in columns_list: gs.fatal( message_option_value_excludes_option_value( option_name="method", option_value=method, excluded_option_name="columns", excluded_option_value=columns, reason=_( "Column '{name}' is not available with the method '{method}'" ).format(name=column, method=method), ) ) elif columns: columns_list = columns.split(",") for column in ["interval_length", "distance_from_begin"]: if column in columns_list: gs.fatal( message_option_value_excludes_option_value( option_name="method", option_value=method, excluded_option_name="columns", excluded_option_value=columns, reason=_( "Column '{name}' is not available with the method '{method}'" ).format(name=column, method=method), ) ) if output_format == "line" or method == "comma": columns_list = columns.split(",") if len(columns_list) > 1: gs.fatal( message_option_value_excludes_option_value( option_name="format", option_value=output_format, excluded_option_name="columns", excluded_option_value=columns, reason=_("Only one column is allowed (not {num_columns})").format( num_columns=len(columns_list) ), ) ) if method == "gran" and where: gs.fatal( message_option_value_excludes_option( option_name="method", option_value=method, excluded_option_name="where", reason=_("All maps are always listed"), ) ) # Make sure the temporal database exists tgis.init() tgis.list_maps_of_stds( "strds", input, columns, order, where, separator, method, header, granule, outpath=output, output_format=output_format, )
def main(): global insert_sql insert_sql = None global temporary_vect temporary_vect = None global stats_temp_file stats_temp_file = None segment_map = options['map'] csvfile = options['csvfile'] if options['csvfile'] else [] vectormap = options['vectormap'] if options['vectormap'] else [] global rasters rasters = options['rasters'].split(',') if options['rasters'] else [] area_measures = options['area_measures'].split(',') if ( options['area_measures'] and not flags['s']) else [] if area_measures: if not gscript.find_program('r.object.geometry', '--help'): message = _( "You need to install the addon r.object.geometry to be able") message += _(" to calculate area measures.\n") message += _( " You can install the addon with 'g.extension r.object.geometry'" ) gscript.fatal(message) neighborhood = True if flags['n'] else False if neighborhood: if not gscript.find_program('r.neighborhoodmatrix', '--help'): message = _( "You need to install the addon r.neighborhoodmatrix to be able" ) message += _(" to calculate area measures.\n") message += _( " You can install the addon with 'g.extension r.neighborhoodmatrix'" ) gscript.fatal(message) raster_statistics = options['raster_statistics'].split( ',') if options['raster_statistics'] else [] separator = gscript.separator(options['separator']) processes = int(options['processes']) output_header = ['cat'] output_dict = collections.defaultdict(list) raster_stat_dict = { 'zone': 0, 'min': 4, 'third_quart': 16, 'max': 5, 'sum': 12, 'null_cells': 3, 'median': 15, 'label': 1, 'first_quart': 14, 'range': 6, 'mean_of_abs': 8, 'stddev': 9, 'non_null_cells': 2, 'coeff_var': 11, 'variance': 10, 'sum_abs': 13, 'perc_90': 17, 'mean': 7 } geometry_stat_dict = { 'cat': 0, 'area': 1, 'perimeter': 2, 'compact_square': 3, 'compact_circle': 4, 'fd': 5, 'xcoords': 6, 'ycoords': 7 } if flags['r']: gscript.use_temp_region() gscript.run_command('g.region', raster=segment_map) stats_temp_file = gscript.tempfile() if area_measures: gscript.message(_("Calculating geometry statistics...")) output_header += area_measures stat_indices = [geometry_stat_dict[x] for x in area_measures] gscript.run_command('r.object.geometry', input_=segment_map, output=stats_temp_file, overwrite=True, quiet=True) firstline = True with open(stats_temp_file, 'r') as fin: for line in fin: if firstline: firstline = False continue values = line.rstrip().split('|') output_dict[values[0]] = [values[x] for x in stat_indices] if rasters: if not flags['c']: gscript.message(_("Checking usability of raster maps...")) rasters_to_remove = [] for raster in rasters: null_values_found = False if not gscript.find_file(raster, element='cell')['name']: gscript.message(_("Cannot find raster '%s'" % raster)) gscript.message(_("Removing this raster from list.")) rasters_to_remove.append(raster) continue current_mapset = gscript.gisenv()['MAPSET'] if gscript.find_file('MASK', element='cell', mapset=current_mapset)['name']: null_test = gscript.read_command('r.stats', flags='N', input_=['MASK', raster], quiet=True).splitlines() if '1 *' in null_test: null_values_found = True else: raster_info = gscript.parse_command('r.univar', flags='g', map_=raster, quiet=True) if len(raster_info) == 0 or int( raster_info['null_cells']) > 0: null_values_found = True if null_values_found: message = 'Raster <%s> contains null values.\n' % raster message += 'This can lead to errors in the calculations.\n' message += 'Check region settings and raster extent.\n' message += 'Possibly fill null values of raster.\n' message += 'Removing this raster from list.' gscript.warning(message) rasters_to_remove.append(raster) for raster in rasters_to_remove: rasters.remove(raster) if len(rasters) > 0: gscript.message( _("Calculating statistics for the following raster maps:")) gscript.message(','.join(rasters)) if len(rasters) < processes: processes = len(rasters) gscript.message( _("Only one process per raster. Reduced number of processes to %i." % processes)) stat_indices = [raster_stat_dict[x] for x in raster_statistics] pool = Pool(processes) func = partial(worker, segment_map, stats_temp_file) pool.map(func, rasters) pool.close() pool.join() for raster in rasters: rastername = raster.split('@')[0] rastername = rastername.replace('.', '_') temp_file = stats_temp_file + '.' + rastername output_header += [ rastername + "_" + x for x in raster_statistics ] firstline = True with open(temp_file, 'r') as fin: for line in fin: if firstline: firstline = False continue values = line.rstrip().split('|') output_dict[values[0]] = output_dict[values[0]] + [ values[x] for x in stat_indices ] # Calculating neighborhood statistics if requested if neighborhood: gscript.message(_("Calculating neighborhood statistics...")) # Add neighbordhood statistics to headers original_nb_values = len(output_header) - 1 new_headers = ['neighbors_count'] for i in range(1, len(output_header)): new_headers.append('%s_nbrmean' % output_header[i]) new_headers.append('%s_nbrstddev' % output_header[i]) output_header += new_headers # Get sorted neighborhood matrix nbr_matrix = sorted([ x.split('|') for x in gscript.read_command('r.neighborhoodmatrix', input_=segment_map, flags='d', quiet=True).splitlines() ]) # Calculate mean and stddev of neighbor values for each variable in the # output_dict for key, group in groupby(nbr_matrix, lambda x: x[0]): d = {} for i in range(original_nb_values): d[i] = (0, 0, 0) nbrlist = [str(x[1]) for x in group] if len(nbrlist) > 1: for nbr in nbrlist: for i in range(original_nb_values): d[i] = update(d[i], float(output_dict[nbr][i])) output_dict[key] = output_dict[key] + [str(len(nbrlist))] output_dict[key] = output_dict[key] + [ str(i) for sub in [finalize(x) for x in d.values()] for i in sub ] else: newvalues = ['1'] nbr = nbrlist[0] for i in range(original_nb_values): newvalues.append(output_dict[nbr][i]) newvalues.append('0') output_dict[key] = output_dict[key] + newvalues message = _("Some values could not be calculated for the objects below. ") message += _("These objects are thus not included in the results. ") message += _("HINT: Check some of the raster maps for null values ") message += _("and possibly fill these values with r.fillnulls.") error_objects = [] if csvfile: with open(csvfile, 'w') as f: f.write(separator.join(output_header) + "\n") for key in output_dict: if len(output_dict[key]) + 1 == len(output_header): f.write(key + separator + separator.join(output_dict[key]) + "\n") else: error_objects.append(key) f.close() if vectormap: gscript.message(_("Creating output vector map...")) temporary_vect = 'segmstat_tmp_vect_%d' % os.getpid() gscript.run_command('r.to.vect', input_=segment_map, output=temporary_vect, type_='area', flags='vt', overwrite=True, quiet=True) insert_sql = gscript.tempfile() fsql = open(insert_sql, 'w') fsql.write('BEGIN TRANSACTION;\n') if gscript.db_table_exist(temporary_vect): if gscript.overwrite(): fsql.write('DROP TABLE %s;' % temporary_vect) else: gscript.fatal( _("Table %s already exists. Use --o to overwrite" % temporary_vect)) create_statement = 'CREATE TABLE ' + temporary_vect + ' (cat int PRIMARY KEY);\n' fsql.write(create_statement) for header in output_header[1:]: addcol_statement = 'ALTER TABLE %s ADD COLUMN %s double precision;\n' % ( temporary_vect, header) fsql.write(addcol_statement) for key in output_dict: if len(output_dict[key]) + 1 == len(output_header): sql = "INSERT INTO %s VALUES (%s, %s);\n" % ( temporary_vect, key, ",".join(output_dict[key])) sql = sql.replace('inf', 'NULL') sql = sql.replace('nan', 'NULL') fsql.write(sql) else: if not csvfile: error_objects.append(key) fsql.write('END TRANSACTION;') fsql.close() gscript.run_command('db.execute', input=insert_sql, quiet=True) gscript.run_command('v.db.connect', map_=temporary_vect, table=temporary_vect, quiet=True) gscript.run_command('g.copy', vector="%s,%s" % (temporary_vect, vectormap), quiet=True) if error_objects: object_string = ', '.join(error_objects[:100]) message += _( "\n\nObjects with errors (only first 100 are shown):\n%s" % object_string) gscript.message(message)