def _schema2doc_map(self): _map = { 'integer': dsl_field.Long(), 'number': dsl_field.ScaledFloat(scaling_factor=100), 'string': dsl_field.Text(analyzer=polish_analyzer, fields={ 'raw': dsl_field.Text(), 'keyword': dsl_field.Keyword(), }), 'any': dsl_field.Text(analyzer=polish_analyzer, fields={ 'raw': dsl_field.Text(), 'keyword': dsl_field.Keyword(), }), 'boolean': dsl_field.Boolean(), 'time': dsl_field.Text( fields={ 'text': dsl_field.Text(), 'time': dsl_field.Date( format=constance_config.TIME_FORMATS), }), 'duration': dsl_field.DateRange(), 'default': dsl_field.Text(), 'date': dsl_field.Text( fields={ 'text': dsl_field.Text(), 'date': dsl_field.Date( format=constance_config.DATE_FORMATS), }), 'datetime': dsl_field.Text( fields={ 'text': dsl_field.Text(), 'datetime': dsl_field.Date(format=constance_config.DATE_FORMATS), }) } for key, val in _map.items(): _map[key] = CustomObject(properties={ 'val': val, 'repr': dsl_field.Keyword(), }) return _map
def test_scaled_float(): with pytest.raises(TypeError): field.ScaledFloat() f = field.ScaledFloat(123) assert f.to_dict() == {'scaling_factor': 123, 'type': 'scaled_float'}
es_connections = Connections() es_connections.configure(**settings.ELASTICSEARCH_DSL) STATUS_CHOICES = [ ('published', _('Published')), ('draft', _('Draft')) ] OPENNESS_SCORE = {_type: os for _, _type, _, os in settings.SUPPORTED_CONTENT_TYPES} signal_logger = logging.getLogger('signals') _schema2doc_map = { 'integer': dsl_field.Float(), 'number': dsl_field.ScaledFloat(scaling_factor=100), 'string': dsl_field.Text( analyzer=polish_analyzer, fields={ 'raw': dsl_field.Text(), } ), 'any': dsl_field.Text( analyzer=polish_analyzer, fields={ 'raw': dsl_field.Text(), } ), 'boolean': dsl_field.Boolean(), 'date': dsl_field.Date(), 'datetime': dsl_field.Date(),
class ShpData(IndexedData): _type = 'geo' _schema2doc_map = { 'C': dsl_field.Text( analyzer=polish_analyzer, fields={ 'raw': dsl_field.Text(), 'keyword': dsl_field.Keyword(), }, ), 'D': dsl_field.Date(), 'N': dsl_field.ScaledFloat(scaling_factor=100), 'L': dsl_field.Boolean(), '@': dsl_field.Date(), 'I': dsl_field.Long(), '+': dsl_field.Long(), 'F': dsl_field.Float(), 'O': dsl_field.Double(), } _schema_to_api_field = { 'C': api_fields.String, 'D': api_fields.DateTime, 'N': api_fields.Number, 'L': api_fields.Boolean, '@': api_fields.DateTime, 'I': api_fields.Number, '+': api_fields.Number, 'F': api_fields.Number, 'O': api_fields.Number, } _schema_long_names = { 'C': 'string', 'D': 'datetime', 'N': 'number', 'L': 'boolean', '@': 'datetime', 'I': 'integer', '+': 'integer', 'F': 'number', 'O': 'number', } _source = None _schema = None _transformer = None def __init__(self, resource, from_table_index=False): super().__init__(resource) self.from_table_index = from_table_index @property def has_geo_data(self): return True @property def is_chartable(self): fields = self.schema return len(fields) > 1 and any( (field.type in ('N', 'I', '+', 'F', 'O') for field in fields)) @property def source(self): if not self._source: with ArchiveReader(self.resource.main_file.path) as extracted: shp_path = next( iter(f for f in extracted if f.endswith('.shp'))) self._source = shapefile.Reader(shp_path) self._transformer = ShapeTransformer(extracted) return self._source def get_schema(self, **kwargs): use_aliases = kwargs.get('use_aliases', False) headers = self.reversed_headers_map return { 'fields': [{ 'name': headers[item.name] if use_aliases else item.name, 'type': self._schema_long_names[item.type], 'format': 'default' } for item in self.schema] } @property def schema(self): if not self._schema: self._schema = [ DBSchemaField(*_f) for _f in self.source.fields[1:] ] return self._schema def prepare_doc(self): _fields = { 'shape': dsl_field.GeoShape(), 'point': dsl_field.GeoPoint(), 'shape_type': dsl_field.Integer(), 'label': dsl_field.Text(), 'resource': dsl_field.Nested( properties={ 'id': dsl_field.Integer(), 'title': dsl_field.Text(analyzer=polish_analyzer, fields={'raw': dsl_field.Keyword()}) }), 'updated_at': dsl_field.Date(), 'row_no': dsl_field.Long() } _map = {} for idx, _f in enumerate(self.schema, 1): if _f.type not in self._schema2doc_map: continue alias_name = _f.name field_name = f'col{idx}' _field = self._schema2doc_map[_f.type] _map[field_name] = alias_name _fields[field_name] = _field _fields['Index'] = type('Index', (type, ), {'name': self.idx_name}) doc = type(self.idx_name, (Document, ), _fields) doc._doc_type.mapping._meta['_meta'] = {'headers': _map} return doc def get_api_fields(self): record_fields = {} for f in self.schema: field_name = self.reversed_headers_map[f.name] field_cls = self._schema_to_api_field[f.type] record_fields[field_name] = field_cls(is_tabular_data_field=True) return record_fields @staticmethod def _get_row_id(row): return str( uuid.uuid5(uuid.NAMESPACE_DNS, '+|+'.join(str(i)[:10000] for i in row))) def _docs_iter(self, doc): for row_no, sr in enumerate(self.source.shapeRecords(), 1): geojson = self._transformer.transform(sr.shape) v = { 'shape': geojson, 'updated_at': datetime.now(), 'row_no': row_no, 'resource': { 'id': self.resource.id, 'title': self.resource.title }, } for i, val in enumerate(sr.record, 1): v[f'col{i}'] = val if val != b'' else None v['shape_type'] = sr.shape.shapeType v['point'] = median_point(geojson) tds = self.resource.tabular_data_schema if tds is not None and 'geo' in tds and 'label' in tds['geo']: v['label'] = sr.record[tds['geo']['label'].get('col_name')] d = doc(**v) d.meta.id = self._get_row_id(sr.record) yield d
def test_scaled_float(): with pytest.raises(TypeError): field.ScaledFloat() f = field.ScaledFloat(123) assert f.to_dict() == {"scaling_factor": 123, "type": "scaled_float"}