def calcNumericStats(self, features, feedback, field, count): total = 100.0 / count if count else 0 stat = QgsStatisticalSummary() for current, ft in enumerate(features): if feedback.isCanceled(): break stat.addVariant(ft[field.name()]) feedback.setProgress(int(current * total)) stat.finalize() cv = stat.stDev() / stat.mean() if stat.mean() != 0 else 0 results = { self.COUNT: stat.count(), self.UNIQUE: stat.variety(), self.EMPTY: stat.countMissing(), self.FILLED: count - stat.countMissing(), self.MIN: stat.min(), self.MAX: stat.max(), self.RANGE: stat.range(), self.SUM: stat.sum(), self.MEAN: stat.mean(), self.MEDIAN: stat.median(), self.STD_DEV: stat.stDev(), self.CV: cv, self.MINORITY: stat.minority(), self.MAJORITY: stat.majority(), self.FIRSTQUARTILE: stat.firstQuartile(), self.THIRDQUARTILE: stat.thirdQuartile(), self.IQR: stat.interQuartileRange() } data = [] data.append(self.tr('Count: {}').format(stat.count())) data.append(self.tr('Unique values: {}').format(stat.variety())) data.append( self.tr('NULL (missing) values: {}').format(stat.countMissing())) data.append(self.tr('Minimum value: {}').format(stat.min())) data.append(self.tr('Maximum value: {}').format(stat.max())) data.append(self.tr('Range: {}').format(stat.range())) data.append(self.tr('Sum: {}').format(stat.sum())) data.append(self.tr('Mean value: {}').format(stat.mean())) data.append(self.tr('Median value: {}').format(stat.median())) data.append(self.tr('Standard deviation: {}').format(stat.stDev())) data.append(self.tr('Coefficient of Variation: {}').format(cv)) data.append( self.tr('Minority (rarest occurring value): {}').format( stat.minority())) data.append( self.tr('Majority (most frequently occurring value): {}').format( stat.majority())) data.append(self.tr('First quartile: {}').format(stat.firstQuartile())) data.append(self.tr('Third quartile: {}').format(stat.thirdQuartile())) data.append( self.tr('Interquartile Range (IQR): {}').format( stat.interQuartileRange())) return data, results
def calcNumericStats(self, features, progress, field): count = len(features) total = 100.0 / float(count) stat = QgsStatisticalSummary() for current, ft in enumerate(features): stat.addVariant(ft[field.name()]) progress.setPercentage(int(current * total)) stat.finalize() cv = stat.stDev() / stat.mean() if stat.mean() != 0 else 0 self.setOutputValue(self.COUNT, stat.count()) self.setOutputValue(self.UNIQUE, stat.variety()) self.setOutputValue(self.EMPTY, stat.countMissing()) self.setOutputValue(self.FILLED, count - stat.countMissing()) self.setOutputValue(self.MIN, stat.min()) self.setOutputValue(self.MAX, stat.max()) self.setOutputValue(self.RANGE, stat.range()) self.setOutputValue(self.SUM, stat.sum()) self.setOutputValue(self.MEAN, stat.mean()) self.setOutputValue(self.MEDIAN, stat.median()) self.setOutputValue(self.STD_DEV, stat.stDev()) self.setOutputValue(self.CV, cv) self.setOutputValue(self.MINORITY, stat.minority()) self.setOutputValue(self.MAJORITY, stat.majority()) self.setOutputValue(self.FIRSTQUARTILE, stat.firstQuartile()) self.setOutputValue(self.THIRDQUARTILE, stat.thirdQuartile()) self.setOutputValue(self.IQR, stat.interQuartileRange()) data = [] data.append(self.tr('Count: {}').format(stat.count())) data.append(self.tr('Unique values: {}').format(stat.variety())) data.append( self.tr('NULL (missing) values: {}').format(stat.countMissing())) data.append(self.tr('Minimum value: {}').format(stat.min())) data.append(self.tr('Maximum value: {}').format(stat.max())) data.append(self.tr('Range: {}').format(stat.range())) data.append(self.tr('Sum: {}').format(stat.sum())) data.append(self.tr('Mean value: {}').format(stat.mean())) data.append(self.tr('Median value: {}').format(stat.median())) data.append(self.tr('Standard deviation: {}').format(stat.stDev())) data.append(self.tr('Coefficient of Variation: {}').format(cv)) data.append( self.tr('Minority (rarest occurring value): {}').format( stat.minority())) data.append( self.tr('Majority (most frequently occurring value): {}').format( stat.majority())) data.append(self.tr('First quartile: {}').format(stat.firstQuartile())) data.append(self.tr('Third quartile: {}').format(stat.thirdQuartile())) data.append( self.tr('Interquartile Range (IQR): {}').format( stat.interQuartileRange())) return data
def calcNumericStats(self, features, feedback, field, count): total = 100.0 / count if count else 0 stat = QgsStatisticalSummary() for current, ft in enumerate(features): if feedback.isCanceled(): break stat.addVariant(ft[field.name()]) feedback.setProgress(int(current * total)) stat.finalize() cv = stat.stDev() / stat.mean() if stat.mean() != 0 else 0 results = {self.COUNT: stat.count(), self.UNIQUE: stat.variety(), self.EMPTY: stat.countMissing(), self.FILLED: count - stat.countMissing(), self.MIN: stat.min(), self.MAX: stat.max(), self.RANGE: stat.range(), self.SUM: stat.sum(), self.MEAN: stat.mean(), self.MEDIAN: stat.median(), self.STD_DEV: stat.stDev(), self.CV: cv, self.MINORITY: stat.minority(), self.MAJORITY: stat.majority(), self.FIRSTQUARTILE: stat.firstQuartile(), self.THIRDQUARTILE: stat.thirdQuartile(), self.IQR: stat.interQuartileRange()} data = [] data.append(self.tr('Count: {}').format(stat.count())) data.append(self.tr('Unique values: {}').format(stat.variety())) data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing())) data.append(self.tr('Minimum value: {}').format(stat.min())) data.append(self.tr('Maximum value: {}').format(stat.max())) data.append(self.tr('Range: {}').format(stat.range())) data.append(self.tr('Sum: {}').format(stat.sum())) data.append(self.tr('Mean value: {}').format(stat.mean())) data.append(self.tr('Median value: {}').format(stat.median())) data.append(self.tr('Standard deviation: {}').format(stat.stDev())) data.append(self.tr('Coefficient of Variation: {}').format(cv)) data.append(self.tr('Minority (rarest occurring value): {}').format(stat.minority())) data.append(self.tr('Majority (most frequently occurring value): {}').format(stat.majority())) data.append(self.tr('First quartile: {}').format(stat.firstQuartile())) data.append(self.tr('Third quartile: {}').format(stat.thirdQuartile())) data.append(self.tr('Interquartile Range (IQR): {}').format(stat.interQuartileRange())) return data, results
def calcNumericStats(self, features, progress, field): count = len(features) total = 100.0 / float(count) stat = QgsStatisticalSummary() for current, ft in enumerate(features): stat.addVariant(ft[field.name()]) progress.setPercentage(int(current * total)) stat.finalize() cv = stat.stDev() / stat.mean() if stat.mean() != 0 else 0 self.setOutputValue(self.COUNT, stat.count()) self.setOutputValue(self.UNIQUE, stat.variety()) self.setOutputValue(self.EMPTY, stat.countMissing()) self.setOutputValue(self.FILLED, count - stat.countMissing()) self.setOutputValue(self.MIN, stat.min()) self.setOutputValue(self.MAX, stat.max()) self.setOutputValue(self.RANGE, stat.range()) self.setOutputValue(self.SUM, stat.sum()) self.setOutputValue(self.MEAN, stat.mean()) self.setOutputValue(self.MEDIAN, stat.median()) self.setOutputValue(self.STD_DEV, stat.stDev()) self.setOutputValue(self.CV, cv) self.setOutputValue(self.MINORITY, stat.minority()) self.setOutputValue(self.MAJORITY, stat.majority()) self.setOutputValue(self.FIRSTQUARTILE, stat.firstQuartile()) self.setOutputValue(self.THIRDQUARTILE, stat.thirdQuartile()) self.setOutputValue(self.IQR, stat.interQuartileRange()) data = [] data.append(self.tr('Count: {}').format(stat.count())) data.append(self.tr('Unique values: {}').format(stat.variety())) data.append(self.tr('NULL (missing) values: {}').format(stat.countMissing())) data.append(self.tr('Minimum value: {}').format(stat.min())) data.append(self.tr('Maximum value: {}').format(stat.max())) data.append(self.tr('Range: {}').format(stat.range())) data.append(self.tr('Sum: {}').format(stat.sum())) data.append(self.tr('Mean value: {}').format(stat.mean())) data.append(self.tr('Median value: {}').format(stat.median())) data.append(self.tr('Standard deviation: {}').format(stat.stDev())) data.append(self.tr('Coefficient of Variation: {}').format(cv)) data.append(self.tr('Minority (rarest occurring value): {}').format(stat.minority())) data.append(self.tr('Majority (most frequently occurring value): {}').format(stat.majority())) data.append(self.tr('First quartile: {}').format(stat.firstQuartile())) data.append(self.tr('Third quartile: {}').format(stat.thirdQuartile())) data.append(self.tr('Interquartile Range (IQR): {}').format(stat.interQuartileRange())) return data
def processAlgorithm(self, parameters, context, feedback): source = self.parameterAsSource(parameters, self.INPUT, context) if source is None: raise QgsProcessingException( self.invalidSourceError(parameters, self.INPUT)) join_source = self.parameterAsSource(parameters, self.JOIN, context) if join_source is None: raise QgsProcessingException( self.invalidSourceError(parameters, self.JOIN)) join_fields = self.parameterAsFields(parameters, self.JOIN_FIELDS, context) discard_nomatch = self.parameterAsBool(parameters, self.DISCARD_NONMATCHING, context) summaries = [ self.statistics[i][0] for i in sorted( self.parameterAsEnums(parameters, self.SUMMARIES, context)) ] if not summaries: # none selected, so use all summaries = [s[0] for s in self.statistics] source_fields = source.fields() fields_to_join = QgsFields() join_field_indexes = [] if not join_fields: # no fields selected, use all join_fields = [ join_source.fields().at(i).name() for i in range(len(join_source.fields())) ] def addFieldKeepType(original, stat): """ Adds a field to the output, keeping the same data type as the original """ field = QgsField(original) field.setName(field.name() + '_' + stat) fields_to_join.append(field) def addField(original, stat, type): """ Adds a field to the output, with a specified type """ field = QgsField(original) field.setName(field.name() + '_' + stat) field.setType(type) if type == QVariant.Double: field.setLength(20) field.setPrecision(6) fields_to_join.append(field) numeric_fields = (('count', QVariant.Int, 'count'), ('unique', QVariant.Int, 'variety'), ('min', QVariant.Double, 'min'), ('max', QVariant.Double, 'max'), ('range', QVariant.Double, 'range'), ('sum', QVariant.Double, 'sum'), ('mean', QVariant.Double, 'mean'), ('median', QVariant.Double, 'median'), ('stddev', QVariant.Double, 'stDev'), ('minority', QVariant.Double, 'minority'), ('majority', QVariant.Double, 'majority'), ('q1', QVariant.Double, 'firstQuartile'), ('q3', QVariant.Double, 'thirdQuartile'), ('iqr', QVariant.Double, 'interQuartileRange')) datetime_fields = (('count', QVariant.Int, 'count'), ('unique', QVariant.Int, 'countDistinct'), ('empty', QVariant.Int, 'countMissing'), ('filled', QVariant.Int), ('min', None), ('max', None)) string_fields = (('count', QVariant.Int, 'count'), ('unique', QVariant.Int, 'countDistinct'), ('empty', QVariant.Int, 'countMissing'), ('filled', QVariant.Int), ('min', None, 'min'), ('max', None, 'max'), ('min_length', QVariant.Int, 'minLength'), ('max_length', QVariant.Int, 'maxLength'), ('mean_length', QVariant.Double, 'meanLength')) field_types = [] for f in join_fields: idx = join_source.fields().lookupField(f) if idx >= 0: join_field_indexes.append(idx) join_field = join_source.fields().at(idx) if join_field.isNumeric(): field_types.append('numeric') field_list = numeric_fields elif join_field.type() in (QVariant.Date, QVariant.Time, QVariant.DateTime): field_types.append('datetime') field_list = datetime_fields else: field_types.append('string') field_list = string_fields for f in field_list: if f[0] in summaries: if f[1] is not None: addField(join_field, f[0], f[1]) else: addFieldKeepType(join_field, f[0]) out_fields = QgsProcessingUtils.combineFields(source_fields, fields_to_join) (sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT, context, out_fields, source.wkbType(), source.sourceCrs()) if sink is None: raise QgsProcessingException( self.invalidSinkError(parameters, self.OUTPUT)) # do the join predicates = [ self.predicates[i][0] for i in self.parameterAsEnums(parameters, self.PREDICATE, context) ] features = source.getFeatures() total = 100.0 / source.featureCount() if source.featureCount() else 0 # bounding box transform bbox_transform = QgsCoordinateTransform(source.sourceCrs(), join_source.sourceCrs(), context.project()) for current, f in enumerate(features): if feedback.isCanceled(): break if not f.hasGeometry(): if not discard_nomatch: # ensure consistent count of attributes - otherwise non matching # features will have incorrect attribute length # and provider may reject them attrs = f.attributes() if len(attrs) < len(out_fields): attrs += [NULL] * (len(out_fields) - len(attrs)) f.setAttributes(attrs) sink.addFeature(f, QgsFeatureSink.FastInsert) continue bbox = bbox_transform.transformBoundingBox( f.geometry().boundingBox()) engine = None values = [] request = QgsFeatureRequest().setFilterRect( bbox).setSubsetOfAttributes( join_field_indexes).setDestinationCrs( source.sourceCrs(), context.transformContext()) for test_feat in join_source.getFeatures(request): if feedback.isCanceled(): break join_attributes = [] for a in join_field_indexes: join_attributes.append(test_feat.attributes()[a]) if engine is None: engine = QgsGeometry.createGeometryEngine( f.geometry().constGet()) engine.prepareGeometry() for predicate in predicates: if getattr(engine, predicate)(test_feat.geometry().constGet()): values.append(join_attributes) break feedback.setProgress(int(current * total)) if len(values) == 0: if discard_nomatch: continue else: # ensure consistent count of attributes - otherwise non matching # features will have incorrect attribute length # and provider may reject them attrs = f.attributes() if len(attrs) < len(out_fields): attrs += [NULL] * (len(out_fields) - len(attrs)) f.setAttributes(attrs) sink.addFeature(f, QgsFeatureSink.FastInsert) else: attrs = f.attributes() for i in range(len(join_field_indexes)): attribute_values = [v[i] for v in values] field_type = field_types[i] if field_type == 'numeric': stat = QgsStatisticalSummary() for v in attribute_values: stat.addVariant(v) stat.finalize() for s in numeric_fields: if s[0] in summaries: attrs.append(getattr(stat, s[2])()) elif field_type == 'datetime': stat = QgsDateTimeStatisticalSummary() stat.calculate(attribute_values) for s in datetime_fields: if s[0] in summaries: if s[0] == 'filled': attrs.append(stat.count() - stat.countMissing()) elif s[0] == 'min': attrs.append( stat.statistic( QgsDateTimeStatisticalSummary.Min)) elif s[0] == 'max': attrs.append( stat.statistic( QgsDateTimeStatisticalSummary.Max)) else: attrs.append(getattr(stat, s[2])()) else: stat = QgsStringStatisticalSummary() for v in attribute_values: if v == NULL: stat.addString('') else: stat.addString(str(v)) stat.finalize() for s in string_fields: if s[0] in summaries: if s[0] == 'filled': attrs.append(stat.count() - stat.countMissing()) else: attrs.append(getattr(stat, s[2])()) f.setAttributes(attrs) sink.addFeature(f, QgsFeatureSink.FastInsert) return {self.OUTPUT: dest_id}
def processAlgorithm(self, feedback): layer = dataobjects.getObjectFromUri( self.getParameterValue(self.INPUT_LAYER)) fieldName = self.getParameterValue(self.FIELD_NAME) outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE) request = QgsFeatureRequest().setFlags( QgsFeatureRequest.NoGeometry).setSubsetOfAttributes([fieldName], layer.fields()) stat = QgsStatisticalSummary() features = vector.features(layer, request) count = len(features) total = 100.0 / float(count) for current, ft in enumerate(features): stat.addVariant(ft[fieldName]) feedback.setProgress(int(current * total)) stat.finalize() count = stat.count() uniqueValue = stat.variety() minValue = stat.min() maxValue = stat.max() rValue = stat.range() sumValue = stat.sum() meanValue = stat.mean() medianValue = stat.median() stdDevValue = stat.stDev() cvValue = stdDevValue / meanValue if meanValue != 0 else 0 minority = stat.minority() majority = stat.majority() firstQuartile = stat.firstQuartile() thirdQuartile = stat.thirdQuartile() iqr = stat.interQuartileRange() nullValues = stat.countMissing() data = [] data.append(self.tr('Analyzed layer: {}').format(layer.name())) data.append(self.tr('Analyzed field: {}').format(fieldName)) data.append(self.tr('Count: {}').format(count)) data.append(self.tr('Unique values: {}').format(uniqueValue)) data.append(self.tr('Minimum value: {}').format(minValue)) data.append(self.tr('Maximum value: {}').format(maxValue)) data.append(self.tr('Range: {}').format(rValue)) data.append(self.tr('Sum: {}').format(sumValue)) data.append(self.tr('Mean value: {}').format(meanValue)) data.append(self.tr('Median value: {}').format(medianValue)) data.append(self.tr('Standard deviation: {}').format(stdDevValue)) data.append(self.tr('Coefficient of Variation: {}').format(cvValue)) data.append( self.tr('Minority (rarest occurring value): {}').format(minority)) data.append( self.tr('Majority (most frequently occurring value): {}').format( majority)) data.append(self.tr('First quartile: {}').format(firstQuartile)) data.append(self.tr('Third quartile: {}').format(thirdQuartile)) data.append(self.tr('NULL (missing) values: {}').format(nullValues)) data.append(self.tr('Interquartile Range (IQR): {}').format(iqr)) self.createHTML(outputFile, data) self.setOutputValue(self.COUNT, count) self.setOutputValue(self.UNIQUE, uniqueValue) self.setOutputValue(self.MIN, minValue) self.setOutputValue(self.MAX, maxValue) self.setOutputValue(self.RANGE, rValue) self.setOutputValue(self.SUM, sumValue) self.setOutputValue(self.MEAN, meanValue) self.setOutputValue(self.MEDIAN, medianValue) self.setOutputValue(self.STD_DEV, stdDevValue) self.setOutputValue(self.MINORITY, minority) self.setOutputValue(self.MAJORITY, majority) self.setOutputValue(self.FIRSTQUARTILE, firstQuartile) self.setOutputValue(self.THIRDQUARTILE, thirdQuartile) self.setOutputValue(self.NULLVALUES, nullValues) self.setOutputValue(self.IQR, iqr)
def processAlgorithm(self, progress): layer = dataobjects.getObjectFromUri( self.getParameterValue(self.INPUT_LAYER)) fieldName = self.getParameterValue(self.FIELD_NAME) outputFile = self.getOutputValue(self.OUTPUT_HTML_FILE) request = QgsFeatureRequest().setFlags(QgsFeatureRequest.NoGeometry).setSubsetOfAttributes([fieldName], layer.fields()) stat = QgsStatisticalSummary() features = vector.features(layer, request) count = len(features) total = 100.0 / float(count) for current, ft in enumerate(features): stat.addVariant(ft[fieldName]) progress.setPercentage(int(current * total)) stat.finalize() count = stat.count() uniqueValue = stat.variety() minValue = stat.min() maxValue = stat.max() rValue = stat.range() sumValue = stat.sum() meanValue = stat.mean() medianValue = stat.median() stdDevValue = stat.stDev() cvValue = stdDevValue / meanValue if meanValue != 0 else 0 minority = stat.minority() majority = stat.majority() firstQuartile = stat.firstQuartile() thirdQuartile = stat.thirdQuartile() iqr = stat.interQuartileRange() nullValues = stat.countMissing() data = [] data.append(self.tr('Analyzed layer: {}').format(layer.name())) data.append(self.tr('Analyzed field: {}').format(fieldName)) data.append(self.tr('Count: {}').format(count)) data.append(self.tr('Unique values: {}').format(uniqueValue)) data.append(self.tr('Minimum value: {}').format(minValue)) data.append(self.tr('Maximum value: {}').format(maxValue)) data.append(self.tr('Range: {}').format(rValue)) data.append(self.tr('Sum: {}').format(sumValue)) data.append(self.tr('Mean value: {}').format(meanValue)) data.append(self.tr('Median value: {}').format(medianValue)) data.append(self.tr('Standard deviation: {}').format(stdDevValue)) data.append(self.tr('Coefficient of Variation: {}').format(cvValue)) data.append(self.tr('Minority (rarest occurring value): {}').format(minority)) data.append(self.tr('Majority (most frequently occurring value): {}').format(majority)) data.append(self.tr('First quartile: {}').format(firstQuartile)) data.append(self.tr('Third quartile: {}').format(thirdQuartile)) data.append(self.tr('NULL (missing) values: {}').format(nullValues)) data.append(self.tr('Interquartile Range (IQR): {}').format(iqr)) self.createHTML(outputFile, data) self.setOutputValue(self.COUNT, count) self.setOutputValue(self.UNIQUE, uniqueValue) self.setOutputValue(self.MIN, minValue) self.setOutputValue(self.MAX, maxValue) self.setOutputValue(self.RANGE, rValue) self.setOutputValue(self.SUM, sumValue) self.setOutputValue(self.MEAN, meanValue) self.setOutputValue(self.MEDIAN, medianValue) self.setOutputValue(self.STD_DEV, stdDevValue) self.setOutputValue(self.MINORITY, minority) self.setOutputValue(self.MAJORITY, majority) self.setOutputValue(self.FIRSTQUARTILE, firstQuartile) self.setOutputValue(self.THIRDQUARTILE, thirdQuartile) self.setOutputValue(self.NULLVALUES, nullValues) self.setOutputValue(self.IQR, iqr)
def processAlgorithm(self, parameters, context, feedback): source = self.parameterAsSource(parameters, self.INPUT, context) join_source = self.parameterAsSource(parameters, self.JOIN, context) join_fields = self.parameterAsFields(parameters, self.JOIN_FIELDS, context) discard_nomatch = self.parameterAsBool(parameters, self.DISCARD_NONMATCHING, context) summaries = [self.statistics[i][0] for i in sorted(self.parameterAsEnums(parameters, self.SUMMARIES, context))] if not summaries: # none selected, so use all summaries = [s[0] for s in self.statistics] source_fields = source.fields() fields_to_join = QgsFields() join_field_indexes = [] if not join_fields: # no fields selected, use all join_fields = [join_source.fields().at(i).name() for i in range(len(join_source.fields()))] def addFieldKeepType(original, stat): """ Adds a field to the output, keeping the same data type as the original """ field = QgsField(original) field.setName(field.name() + '_' + stat) fields_to_join.append(field) def addField(original, stat, type): """ Adds a field to the output, with a specified type """ field = QgsField(original) field.setName(field.name() + '_' + stat) field.setType(type) if type == QVariant.Double: field.setLength(20) field.setPrecision(6) fields_to_join.append(field) numeric_fields = ( ('count', QVariant.Int, 'count'), ('unique', QVariant.Int, 'variety'), ('min', QVariant.Double, 'min'), ('max', QVariant.Double, 'max'), ('range', QVariant.Double, 'range'), ('sum', QVariant.Double, 'sum'), ('mean', QVariant.Double, 'mean'), ('median', QVariant.Double, 'median'), ('stddev', QVariant.Double, 'stDev'), ('minority', QVariant.Double, 'minority'), ('majority', QVariant.Double, 'majority'), ('q1', QVariant.Double, 'firstQuartile'), ('q3', QVariant.Double, 'thirdQuartile'), ('iqr', QVariant.Double, 'interQuartileRange') ) datetime_fields = ( ('count', QVariant.Int, 'count'), ('unique', QVariant.Int, 'countDistinct'), ('empty', QVariant.Int, 'countMissing'), ('filled', QVariant.Int), ('min', None), ('max', None) ) string_fields = ( ('count', QVariant.Int, 'count'), ('unique', QVariant.Int, 'countDistinct'), ('empty', QVariant.Int, 'countMissing'), ('filled', QVariant.Int), ('min', None, 'min'), ('max', None, 'max'), ('min_length', QVariant.Int, 'minLength'), ('max_length', QVariant.Int, 'maxLength'), ('mean_length', QVariant.Double, 'meanLength') ) field_types = [] for f in join_fields: idx = join_source.fields().lookupField(f) if idx >= 0: join_field_indexes.append(idx) join_field = join_source.fields().at(idx) if join_field.isNumeric(): field_types.append('numeric') field_list = numeric_fields elif join_field.type() in (QVariant.Date, QVariant.Time, QVariant.DateTime): field_types.append('datetime') field_list = datetime_fields else: field_types.append('string') field_list = string_fields for f in field_list: if f[0] in summaries: if f[1] is not None: addField(join_field, f[0], f[1]) else: addFieldKeepType(join_field, f[0]) out_fields = QgsProcessingUtils.combineFields(source_fields, fields_to_join) (sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT, context, out_fields, source.wkbType(), source.sourceCrs()) # do the join predicates = [self.predicates[i][0] for i in self.parameterAsEnums(parameters, self.PREDICATE, context)] features = source.getFeatures() total = 100.0 / source.featureCount() if source.featureCount() else 0 # bounding box transform bbox_transform = QgsCoordinateTransform(source.sourceCrs(), join_source.sourceCrs(), context.project()) for current, f in enumerate(features): if feedback.isCanceled(): break if not f.hasGeometry(): if not discard_nomatch: # ensure consistent count of attributes - otherwise non matching # features will have incorrect attribute length # and provider may reject them attrs = f.attributes() if len(attrs) < len(out_fields): attrs += [NULL] * (len(out_fields) - len(attrs)) f.setAttributes(attrs) sink.addFeature(f, QgsFeatureSink.FastInsert) continue bbox = bbox_transform.transformBoundingBox(f.geometry().boundingBox()) engine = None values = [] request = QgsFeatureRequest().setFilterRect(bbox).setSubsetOfAttributes(join_field_indexes).setDestinationCrs(source.sourceCrs(), context.transformContext()) for test_feat in join_source.getFeatures(request): if feedback.isCanceled(): break join_attributes = [] for a in join_field_indexes: join_attributes.append(test_feat.attributes()[a]) if engine is None: engine = QgsGeometry.createGeometryEngine(f.geometry().constGet()) engine.prepareGeometry() for predicate in predicates: if getattr(engine, predicate)(test_feat.geometry().constGet()): values.append(join_attributes) break feedback.setProgress(int(current * total)) if len(values) == 0: if discard_nomatch: continue else: # ensure consistent count of attributes - otherwise non matching # features will have incorrect attribute length # and provider may reject them attrs = f.attributes() if len(attrs) < len(out_fields): attrs += [NULL] * (len(out_fields) - len(attrs)) f.setAttributes(attrs) sink.addFeature(f, QgsFeatureSink.FastInsert) else: attrs = f.attributes() for i in range(len(join_field_indexes)): attribute_values = [v[i] for v in values] field_type = field_types[i] if field_type == 'numeric': stat = QgsStatisticalSummary() for v in attribute_values: stat.addVariant(v) stat.finalize() for s in numeric_fields: if s[0] in summaries: attrs.append(getattr(stat, s[2])()) elif field_type == 'datetime': stat = QgsDateTimeStatisticalSummary() stat.calculate(attribute_values) for s in datetime_fields: if s[0] in summaries: if s[0] == 'filled': attrs.append(stat.count() - stat.countMissing()) elif s[0] == 'min': attrs.append(stat.statistic(QgsDateTimeStatisticalSummary.Min)) elif s[0] == 'max': attrs.append(stat.statistic(QgsDateTimeStatisticalSummary.Max)) else: attrs.append(getattr(stat, s[2])()) else: stat = QgsStringStatisticalSummary() for v in attribute_values: if v == NULL: stat.addString('') else: stat.addString(str(v)) stat.finalize() for s in string_fields: if s[0] in summaries: if s[0] == 'filled': attrs.append(stat.count() - stat.countMissing()) else: attrs.append(getattr(stat, s[2])()) f.setAttributes(attrs) sink.addFeature(f, QgsFeatureSink.FastInsert) return {self.OUTPUT: dest_id}
def processAlgorithm(self, parameters, context, feedback): ''' Here is where the processing itself takes place. ''' # if not is_dependencies_satisfied: return {} # Init # The number of features in the input layer could be trimmed to user selection. the_layer = self.parameterAsSource(parameters, self.THE_LAYER, context) gok = QgsWkbTypes.geometryType( the_layer.wkbType()) == QgsWkbTypes.PointGeometry if the_layer is None or not gok: raise QgsProcessingException( self.invalidSourceError(parameters, self.THE_LAYER)) # bCHscal = self.parameterAsBool(parameters, self.BSCALE, context) if bCHscal: # Use another channel for scaling. All data from that channel will be used. scally = self.parameterAsSource(parameters, self.SCALLY, context) if scally is None or the_layer.wkbType() != QgsWkbTypes.Point: raise QgsProcessingException( self.invalidSourceError(parameters, self.SCALLY)) fidu_fld = self.parameterAsString(parameters, self.FID_FLD, context) data_fld = self.parameterAsString(parameters, self.DATA_FLD, context) line_fld = self.parameterAsString(parameters, self.LINE_FLD, context) invP = self.parameterAsBool(parameters, self.INVERTP, context) dumval = self.parameterAsDouble(parameters, self.DUMVAL, context) scale = self.parameterAsDouble(parameters, self.SCALE, context) offset = self.parameterAsDouble(parameters, self.OFFSET, context) join_to_line = self.parameterAsBool(parameters, self.JOINL, context) data = the_layer.fields().at(the_layer.fields().lookupField(data_fld)) fidu = the_layer.fields().at(the_layer.fields().lookupField(fidu_fld)) if not data.isNumeric() or not fidu.isNumeric(): raise QgsProcessingException( self.invalidSourceError(parameters, self.THE_LAYER)) line = the_layer.fields().at(the_layer.fields().lookupField(line_fld)) data_ix = the_layer.fields().lookupField(data_fld) line_ix = the_layer.fields().lookupField(line_fld) fidu_ix = the_layer.fields().lookupField(fidu_fld) # Set output vector layer: point(X, Y, M) M is data value at that point output_wkb = QgsWkbTypes.LineString output_wkb = QgsWkbTypes.addM(output_wkb) # Fields of stacked profiles vector line_def = the_layer.fields().at(line_ix) fields = QgsFields() if line_def is not None: fields = QgsFields() fields.append(QgsField('Line', QVariant.String, '', 16)) fields.append(QgsField('Type', QVariant.String, '', 2)) fields.append(QgsField('NbPts', QVariant.Int, '', 10, 0)) fields.append(QgsField('Azimuth', QVariant.Double, '', 10, 6)) fields.append(QgsField('DistEP', QVariant.Double, '', 10, 2)) fields.append(QgsField('Length', QVariant.Double, '', 10, 2)) (sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT, context, fields, output_wkb, the_layer.sourceCrs()) if sink is None: raise QgsProcessingException( self.invalidSinkError(parameters, self.OUTPUT)) # Get the features and fields of interest features = the_layer.getFeatures( QgsFeatureRequest().setSubsetOfAttributes( [fidu_ix, line_ix, data_ix]), QgsProcessingFeatureSource.FlagSkipGeometryValidityChecks) # CSV # Find min/max of data values for all lines and save each line in a csv file # Then process each line separately: can have any number of lines... lines = [] xyzf = [] lineN = '' nL = 0 TL = 0. total = 60.0 / the_layer.featureCount() if the_layer.featureCount( ) else 0 stat = QgsStatisticalSummary() for current, ft in enumerate(features): if feedback.isCanceled(): break feedback.setProgress(int(current * total)) if not ft.hasGeometry(): continue # if ft[line.name()] != lineN: if xyzf != []: lines.append([lineN, nL]) the_csv = os.path.join(self.tmpDir, '%s.csv' % str(lineN)) with codecs.open(the_csv, 'w', 'utf-8') as fo: fo.write('X,Y,FID,Data\n') for ar in xyzf: fo.write(','.join(map(str, ar))) fo.write('\n') le = sqrt((xyzf[0][0] - xyzf[-1][0])**2 + (xyzf[0][1] - xyzf[-1][1])**2) if le > TL: TL = le xyzf = [] nL = 0 lineN = ft[line.name()] # rdata = float(ft[data.name()]) fiduu = int(ft[fidu.name()]) if abs(rdata - dumval) < 1e-6: # Dummy value: skip continue # stat.addVariant(ft[data.name()]) # how to handle QgsMultiPoint ??? if (the_layer.wkbType() == QgsWkbTypes.MultiPoint or the_layer.wkbType() == QgsWkbTypes.MultiPointM or the_layer.wkbType() == QgsWkbTypes.MultiPointZ or the_layer.wkbType() == QgsWkbTypes.MultiPointZM or the_layer.wkbType() == QgsWkbTypes.MultiPoint25D): # Suppose they all have the same attributes: # in this case it seems useless to get more than the first point, but... points = ft.geometry().constGet().clone() else: points = [ft.geometry().constGet().clone()] try: for point in points: xyzf.append([point.x(), point.y(), fiduu, rdata]) nL += 1 except: pass # last line if xyzf != []: lines.append([lineN, nL]) the_csv = os.path.join(self.tmpDir, '%s.csv' % str(lineN)) with codecs.open(the_csv, 'w', 'utf-8') as fo: fo.write('X,Y,FID,Data\n') for ar in xyzf: fo.write(','.join(map(str, ar))) fo.write('\n') le = sqrt((xyzf[0][0] - xyzf[-1][0])**2 + (xyzf[0][1] - xyzf[-1][1])**2) if le > TL: TL = le # stat.finalize() self.dmean = stat.mean() self.mult = TL / (stat.max() - stat.min()) # if bCHscal: # Scaling field: retrieve its stats scch_fld = self.parameterAsString(parameters, self.SCALCH, context) scch = scally.fields().at(scally.fields().lookupField(scch_fld)) scch_ix = scally.fields().lookupField(scch_fld) scch_f = scally.getFeatures( QgsFeatureRequest().setSubsetOfAttributes([scch_ix]), QgsProcessingFeatureSource.FlagSkipGeometryValidityChecks) stat = QgsStatisticalSummary() for current, ft in enumerate(scch_f): stat.addVariant(ft[scch.name()]) stat.finalize() self.dmean = stat.mean() self.mult = TL / (stat.max() - stat.min()) # if invP: iv = -1 else: iv = 1 # Profile total = 40.0 / (len(lines) + 1) # For each line: for current, z in enumerate(lines): line = z[0] if feedback.isCanceled(): break if not ft.hasGeometry(): continue feedback.setProgress(int(current * total) + 60.) # Read line back from csv the_csv = os.path.join(self.tmpDir, '%s.csv' % str(line)) if not os.path.exists(the_csv): raise ValueError( 'It seems parameters are swaped: LINE <-> DATA!') ar = pd.read_csv(the_csv) ar = ar.sort_values('FID') # Create the profile px, py = self._do_profile(ar, iv, scale, offset) #Construct vector layer f = QgsFeature() typeL = str(self.type) azimut = float(self.azimut) Len = float(self.length) CLen = float(self.clength) f.setAttributes( [str(line), typeL, int(len(px)), azimut, Len, CLen]) line_pts = [ QgsPoint(x, y, m=m) for x, y, m in zip(px, py, ar.Data) ] if join_to_line: # Join profile to its line e = len(ar) - 1 ar0 = [QgsPoint(ar.X[0], ar.Y[0], m=0.)] ar1 = [QgsPoint(ar.X[e], ar.Y[e], m=0.)] line_pts = ar0 + line_pts + ar1 # f.setGeometry(QgsGeometry(QgsLineString(line_pts))) sink.addFeature(f, QgsFeatureSink.FastInsert) # Delete temp csv file try: os.remove(the_csv) except: pass return {self.OUTPUT: dest_id}