def _schema2doc_map(self):
     _map = {
         'integer':
         dsl_field.Long(),
         'number':
         dsl_field.ScaledFloat(scaling_factor=100),
         'string':
         dsl_field.Text(analyzer=polish_analyzer,
                        fields={
                            'raw': dsl_field.Text(),
                            'keyword': dsl_field.Keyword(),
                        }),
         'any':
         dsl_field.Text(analyzer=polish_analyzer,
                        fields={
                            'raw': dsl_field.Text(),
                            'keyword': dsl_field.Keyword(),
                        }),
         'boolean':
         dsl_field.Boolean(),
         'time':
         dsl_field.Text(
             fields={
                 'text': dsl_field.Text(),
                 'time': dsl_field.Date(
                     format=constance_config.TIME_FORMATS),
             }),
         'duration':
         dsl_field.DateRange(),
         'default':
         dsl_field.Text(),
         'date':
         dsl_field.Text(
             fields={
                 'text': dsl_field.Text(),
                 'date': dsl_field.Date(
                     format=constance_config.DATE_FORMATS),
             }),
         'datetime':
         dsl_field.Text(
             fields={
                 'text':
                 dsl_field.Text(),
                 'datetime':
                 dsl_field.Date(format=constance_config.DATE_FORMATS),
             })
     }
     for key, val in _map.items():
         _map[key] = CustomObject(properties={
             'val': val,
             'repr': dsl_field.Keyword(),
         })
     return _map
Пример #2
0
def test_scaled_float():
    with pytest.raises(TypeError):
        field.ScaledFloat()
    f = field.ScaledFloat(123)
    assert f.to_dict() == {'scaling_factor': 123, 'type': 'scaled_float'}
Пример #3
0
es_connections = Connections()
es_connections.configure(**settings.ELASTICSEARCH_DSL)

STATUS_CHOICES = [
    ('published', _('Published')),
    ('draft', _('Draft'))
]

OPENNESS_SCORE = {_type: os for _, _type, _, os in settings.SUPPORTED_CONTENT_TYPES}

signal_logger = logging.getLogger('signals')

_schema2doc_map = {
    'integer': dsl_field.Float(),
    'number': dsl_field.ScaledFloat(scaling_factor=100),
    'string': dsl_field.Text(
        analyzer=polish_analyzer,
        fields={
            'raw': dsl_field.Text(),
        }
    ),
    'any': dsl_field.Text(
        analyzer=polish_analyzer,
        fields={
            'raw': dsl_field.Text(),
        }
    ),
    'boolean': dsl_field.Boolean(),
    'date': dsl_field.Date(),
    'datetime': dsl_field.Date(),
class ShpData(IndexedData):
    _type = 'geo'

    _schema2doc_map = {
        'C':
        dsl_field.Text(
            analyzer=polish_analyzer,
            fields={
                'raw': dsl_field.Text(),
                'keyword': dsl_field.Keyword(),
            },
        ),
        'D':
        dsl_field.Date(),
        'N':
        dsl_field.ScaledFloat(scaling_factor=100),
        'L':
        dsl_field.Boolean(),
        '@':
        dsl_field.Date(),
        'I':
        dsl_field.Long(),
        '+':
        dsl_field.Long(),
        'F':
        dsl_field.Float(),
        'O':
        dsl_field.Double(),
    }

    _schema_to_api_field = {
        'C': api_fields.String,
        'D': api_fields.DateTime,
        'N': api_fields.Number,
        'L': api_fields.Boolean,
        '@': api_fields.DateTime,
        'I': api_fields.Number,
        '+': api_fields.Number,
        'F': api_fields.Number,
        'O': api_fields.Number,
    }

    _schema_long_names = {
        'C': 'string',
        'D': 'datetime',
        'N': 'number',
        'L': 'boolean',
        '@': 'datetime',
        'I': 'integer',
        '+': 'integer',
        'F': 'number',
        'O': 'number',
    }

    _source = None
    _schema = None
    _transformer = None

    def __init__(self, resource, from_table_index=False):
        super().__init__(resource)
        self.from_table_index = from_table_index

    @property
    def has_geo_data(self):
        return True

    @property
    def is_chartable(self):
        fields = self.schema
        return len(fields) > 1 and any(
            (field.type in ('N', 'I', '+', 'F', 'O') for field in fields))

    @property
    def source(self):
        if not self._source:
            with ArchiveReader(self.resource.main_file.path) as extracted:
                shp_path = next(
                    iter(f for f in extracted if f.endswith('.shp')))
                self._source = shapefile.Reader(shp_path)
                self._transformer = ShapeTransformer(extracted)
        return self._source

    def get_schema(self, **kwargs):
        use_aliases = kwargs.get('use_aliases', False)
        headers = self.reversed_headers_map
        return {
            'fields': [{
                'name': headers[item.name] if use_aliases else item.name,
                'type': self._schema_long_names[item.type],
                'format': 'default'
            } for item in self.schema]
        }

    @property
    def schema(self):
        if not self._schema:
            self._schema = [
                DBSchemaField(*_f) for _f in self.source.fields[1:]
            ]
        return self._schema

    def prepare_doc(self):
        _fields = {
            'shape':
            dsl_field.GeoShape(),
            'point':
            dsl_field.GeoPoint(),
            'shape_type':
            dsl_field.Integer(),
            'label':
            dsl_field.Text(),
            'resource':
            dsl_field.Nested(
                properties={
                    'id':
                    dsl_field.Integer(),
                    'title':
                    dsl_field.Text(analyzer=polish_analyzer,
                                   fields={'raw': dsl_field.Keyword()})
                }),
            'updated_at':
            dsl_field.Date(),
            'row_no':
            dsl_field.Long()
        }
        _map = {}

        for idx, _f in enumerate(self.schema, 1):
            if _f.type not in self._schema2doc_map:
                continue
            alias_name = _f.name
            field_name = f'col{idx}'
            _field = self._schema2doc_map[_f.type]
            _map[field_name] = alias_name
            _fields[field_name] = _field
            _fields['Index'] = type('Index', (type, ), {'name': self.idx_name})

        doc = type(self.idx_name, (Document, ), _fields)
        doc._doc_type.mapping._meta['_meta'] = {'headers': _map}
        return doc

    def get_api_fields(self):
        record_fields = {}
        for f in self.schema:
            field_name = self.reversed_headers_map[f.name]
            field_cls = self._schema_to_api_field[f.type]
            record_fields[field_name] = field_cls(is_tabular_data_field=True)
        return record_fields

    @staticmethod
    def _get_row_id(row):
        return str(
            uuid.uuid5(uuid.NAMESPACE_DNS,
                       '+|+'.join(str(i)[:10000] for i in row)))

    def _docs_iter(self, doc):
        for row_no, sr in enumerate(self.source.shapeRecords(), 1):
            geojson = self._transformer.transform(sr.shape)
            v = {
                'shape': geojson,
                'updated_at': datetime.now(),
                'row_no': row_no,
                'resource': {
                    'id': self.resource.id,
                    'title': self.resource.title
                },
            }
            for i, val in enumerate(sr.record, 1):
                v[f'col{i}'] = val if val != b'' else None

            v['shape_type'] = sr.shape.shapeType
            v['point'] = median_point(geojson)
            tds = self.resource.tabular_data_schema
            if tds is not None and 'geo' in tds and 'label' in tds['geo']:
                v['label'] = sr.record[tds['geo']['label'].get('col_name')]
            d = doc(**v)
            d.meta.id = self._get_row_id(sr.record)
            yield d
Пример #5
0
def test_scaled_float():
    with pytest.raises(TypeError):
        field.ScaledFloat()
    f = field.ScaledFloat(123)
    assert f.to_dict() == {"scaling_factor": 123, "type": "scaled_float"}