class Activiteit(es.DocType): ext_id = es.String(index='not_analyzed') naam = es.String(analyzer=dutch_analyzer) beschrijving = es.String(analyzer=dutch_analyzer) bron_link = es.String(index='not_analyzed') tijdstip = es.String(index='not_analyzed') tags = es.String(index='not_analyzed') centroid = es.GeoPoint() locatie = es.Object(doc_class=Locatie, properties={ 'ext_id': es.String(index='not_analyzed'), 'naam': es.String(analyzer=dutch_analyzer), 'centroid': es.GeoPoint(), 'openbare_ruimte_naam': es.String(index='not_analyzed'), 'huisnummer': es.String(index='not_analyzed'), 'huisnummer_toevoeging': es.String(index='not_analyzed'), 'postcode': es.String(index='not_analyzed') })
class Locatie(es.DocType): ext_id = es.String(index='not_analyzed') naam = es.String(analyzer=dutch_analyzer) centroid = es.GeoPoint() openbare_ruimte_naam = es.String(index='not_analyzed') huisnummer = es.String(index='not_analyzed') huisnummer_toevoeging = es.String(index='not_analyzed') postcode = es.String(index='not_analyzed')
class Gebied(es.DocType): """ Een vindbaar gebied Unesco Buurt Buurtcombinatie Stadsdeel Grootstedelijk Gemeente Woonplaats """ id = es.Keyword() _display = es.Keyword() naam = es.Text(analyzer=analyzers.adres, fields=text_fields) naam_nen = es.Text(analyzer=analyzers.adres, fields=text_fields) naam_ptt = es.Text(analyzer=analyzers.adres, fields=text_fields) postcode = es.Text(analyzer=analyzers.postcode, fields=postcode_fields) g_code = es.Text(analyzer=analyzers.autocomplete, search_analyzer='standard', fields={ 'keyword': es.Keyword(), 'ngram': es.Text(analyzer=analyzers.autocomplete), }) # gebied order order = es.Integer() subtype = es.Keyword() type = es.Keyword() centroid = es.GeoPoint() landelijk_id = es.Text( # Only for voor openbare_ruimte analyzer=analyzers.autocomplete, fields={ 'raw': es.Keyword(), 'nozero': es.Text(analyzer=analyzers.nozero) }) gsg_type = es.Keyword() class Index: name = settings.ELASTIC_INDICES['BAG_GEBIED']
class GeoCoding(PluginBase): """Class that will attempt to geotag a tweet.""" data_schema = { 'geotagged': es.Boolean(), 'location': es.Object(Location), 'coordinates': es.GeoPoint(), } def __init__(self, *args, **kwargs) -> None: """Setup Carmen geotagging options, then init super.""" with warnings.catch_warnings(): # The default setup of carmen appears to raise several warnings, we # suppress them with the catch_warnings context manager. warnings.simplefilter("ignore") resolver_options = {'place': {'allow_unknown_locations': True}} self.geotagger = get_resolver(options=resolver_options) self.geotagger.load_locations() self.location_resolver = LocationEncoder() super().__init__(*args, **kwargs) # type: ignore def process_tweet(self, tweet_json: Dict[str, Any]) -> Dict[str, Any]: """ Attempt to geotag the tweet data. Returns the tweet with new data if any was resolved and will set geotagged according to success or failure. """ LOG.debug('Attempting to geotag tweet') tweet_location = self.geotagger.resolve_tweet(tweet_json['_raw']) tweet_json['geotagged'] = False if tweet_location: LOG.debug(' This tweet includes location information') tweet_json['location'] = self.location_resolver.default( tweet_location[1]) if 'latitude' in tweet_json[ 'location'] and 'longitude' in tweet_json['location']: tweet_json['coordinates'] = { 'lat': tweet_json['location']['latitude'], 'lon': tweet_json['location']['longitude'], } tweet_json['geotagged'] = True LOG.debug('Geotagging completed!') return tweet_json
class DataDocType(es.Document): """Elasticsearch test model""" first_name = es.Keyword() last_name = es.Keyword() city = es.Text() skills = es.Keyword() birthday = es.Date() is_active = es.Boolean() score = es.Integer() location = es.GeoPoint() description = es.Text() class Index: name = 'test'
class KadastraalObject(es.DocType): aanduiding = es.Text( fielddata=True, analyzer=analyzers.postcode, fields=kad_text_fields) # The search aanduiding is the aanduiding without the "acd00 " prefix # remove this in future short_aanduiding = es.Text( analyzer=analyzers.kad_obj_aanduiding, search_analyzer='standard', fields=kad_text_fields) sectie = es.Text( fields=kad_text_fields, ) objectnummer = es.Text( analyzer=analyzers.autocomplete, search_analyzer='standard', fields=kad_int_fields, ) indexletter = es.Keyword( fields=kad_text_fields, ) indexnummer = es.Text( analyzer=analyzers.autocomplete, search_analyzer='standard', fields=kad_int_fields ) order = es.Integer() centroid = es.GeoPoint() gemeente = es.Text(analyzer=analyzers.naam) gemeente_code = es.Keyword(normalizer=analyzers.lowercase) subtype = es.Keyword() _display = es.Keyword() class Index: name = settings.ELASTIC_INDICES['BRK_OBJECT']
class InfoRiegoRecord(dsl.DocType): code = dsl.String() location = dsl.String() date = dsl.Date() rain = dsl.Float() temperature = dsl.Float() rel_humidity = dsl.Float() radiation = dsl.Float() wind_speed = dsl.Float() wind_direction = dsl.Float() lat_lon = dsl.GeoPoint(lat_lon=True) station_height = dsl.Integer() def save(self, **kwargs): return super(InfoRiegoRecord, self).save(**kwargs) class Meta: index = 'inforiego'
class sigpac_record(dsl.DocType): dn_pk = dsl.Long() provincia = dsl.Integer() municipio = dsl.Integer() poligono = dsl.Integer() parcela = dsl.Integer() recinto = dsl.Integer() zona = dsl.Integer() perimetro = dsl.Long() superficie = dsl.Long() pend_med = dsl.Integer() points = dsl.GeoShape() bbox = dsl.GeoShape() bbox_center = dsl.GeoPoint(lat_lon=True) uso_sigpac = dsl.String() agregado = dsl.Integer() cap_auto = dsl.Integer() cap_manual = dsl.Integer() coef_regadio = dsl.Float() c_refpar = dsl.String() c_refpol = dsl.String() c_refrec = dsl.String() dn_oid = dsl.Long() elevation = dsl.Float() def save(self, **kwargs): return super(sigpac_record, self).save(**kwargs) class Meta: index = 'plots' doc_type = 'sigpac'
class Job(es.DocType): class Meta: index = 'jobs' doc_type = 'job-offer' french_elision = es.token_filter('french_elision', type='elision', articles_case=True, articles=[ 'l', 'm', 't', 'qu', 'n', 's', 'j', 'd', 'c', 'jusqu', 'quoiqu', 'lorsqu', 'puisqu' ]) french_stopwords = es.token_filter('french_stopwords', type='stop', stopwords='_french_') # Do not include this filter if keywords is empty french_keywords = es.token_filter('french_keywords', type='keyword_marker', keywords=[]) french_stemmer = es.token_filter('french_stemmer', type='stemmer', language='light_french') french_analyzer = es.analyzer( 'french_analyzer', tokenizer='standard', filter=[ 'lowercase', 'asciifolding', french_elision, french_stopwords, # french_keywords, french_stemmer ], char_filter=['html_strip']) technologies_tokenizer = es.tokenizer('comma_tokenizer', type='pattern', pattern=' |,|, ') technologies_synonyms_filter = es.token_filter( 'technologies_synonyms', type='synonym', synonyms=[ 'c => c_language', 'c++, cpp => cpp_language', 'c/c++, c/cpp => c_language', 'c/c++, c/cpp => cpp_language', 'c#, c♯, csharp => csharp_language', 'f#, f♯, fsharp => fsharp_language', 'c#, c♯, csharp => dotnet', 'f#, f♯, fsharp => dotnet', '.net => dotnet' ]) technologies_analyzer = es.analyzer( 'technologies_analyzer', tokenizer=technologies_tokenizer, filter=['lowercase', 'asciifolding', technologies_synonyms_filter]) company_name_analyzer = es.analyzer('company_name_analyzer', tokenizer='standard', filter=['lowercase', 'asciifolding']) id = es.Integer() url = es.String(index='no') source = es.String(index='not_analyzed') title = es.String( analyzer=french_analyzer, fields={'technologies': es.String(analyzer=technologies_analyzer)}) description = es.String( analyzer=french_analyzer, fields={'technologies': es.String(analyzer=technologies_analyzer)}) company = es.String(analyzer=company_name_analyzer) company_url = es.String(index='no') address = es.String(analyzer=french_analyzer) address_is_valid = es.Boolean() tags = es.Nested(doc_class=Tag, properties=dict(tag=es.String(index='not_analyzed'), weight=es.Integer())) publication_datetime = es.Date() publication_datetime_is_fake = es.Boolean() crawl_datetime = es.Date() geolocation = es.GeoPoint() geolocation_is_valid = es.Boolean() def __init__(self, meta=None, **kwargs): super(Job, self).__init__(meta, **kwargs) self._doc_type.index = compute_index_name(self.index) @property def index(self): return self._doc_type.index @property def doc_type(self): return self._doc_type.name @property def published(self): return format_date(self.publication_datetime, locale='FR_fr') @property def published_in_days(self): delta = datetime.now() - self.publication_datetime # TODO: bugfix return format_timedelta(delta, granularity='day', locale='en_US') @property def alltags(self): tags = [] if self.tags: for tag in self.tags: if tag['tag'] not in condition_tags: tags.append(Tag2(tag['tag'], tag['weight'])) return tags @property def condition_tags(self): tags = [] if self.tags: for tag in self.tags: if tag['tag'] in condition_tags: tag = Tag2(tag['tag'], tag['weight'], Tag2.get_css(tag['tag'])) tags.append(tag) return tags
class Company(es.DocType): class Meta: index = 'companies' doc_type = 'company' french_elision = es.token_filter( 'french_elision', type='elision', articles_case=True, articles=[ 'l', 'm', 't', 'qu', 'n', 's', 'j', 'd', 'c', 'jusqu', 'quoiqu', 'lorsqu', 'puisqu' ] ) french_stopwords = es.token_filter( 'french_stopwords', type='stop', stopwords='_french_' ) # Do not include this filter if keywords is empty french_keywords = es.token_filter( 'french_keywords', type='keyword_marker', keywords=[] ) french_stemmer = es.token_filter( 'french_stemmer', type='stemmer', language='light_french' ) french_analyzer = es.analyzer( 'french_analyzer', tokenizer='standard', filter=[ 'lowercase', 'asciifolding', french_elision, french_stopwords, # french_keywords, french_stemmer ], char_filter=['html_strip'] ) technologies_tokenizer = es.tokenizer( 'comma_tokenizer', type='pattern', pattern=' |,|, ' ) technologies_synonyms_filter = es.token_filter( 'technologies_synonyms', type='synonym', synonyms=[ 'c => c_language', 'c++, cpp => cpp_language', 'c/c++, c/cpp => c_language', 'c/c++, c/cpp => cpp_language', 'c#, c♯, csharp => csharp_language', 'f#, f♯, fsharp => fsharp_language', 'c#, c♯, csharp => dotnet', 'f#, f♯, fsharp => dotnet', '.net => dotnet' ] ) technologies_analyzer = es.analyzer( 'technologies_analyzer', tokenizer=technologies_tokenizer, filter=[ 'lowercase', 'asciifolding', technologies_synonyms_filter ] ) company_name_analyzer = es.analyzer( 'company_name_analyzer', tokenizer='standard', filter=[ 'lowercase', 'asciifolding' ] ) id = es.String(index='no') name = es.String(analyzer=french_analyzer) description = es.String( analyzer=french_analyzer, fields={ 'technologies': es.String(analyzer=technologies_analyzer) } ) technologies = es.String(analyzer=technologies_analyzer) url = es.String(index='no') logo_url = es.String(index='no') address = es.String(analyzer=french_analyzer) address_is_valid = es.Boolean() email = es.String(index='no') phone = es.String(index='no') geolocation = es.GeoPoint() geolocation_is_valid = es.Boolean() def __init__(self, meta=None, **kwargs): super(Company, self).__init__(meta, **kwargs) self._doc_type.index = compute_index_name(self.index) @property def index(self): return self._doc_type.index @property def doc_type(self): return self._doc_type.name
class Geocomplete(es.DocType): class Meta: index = 'geocomplete' doc_type = 'geoloc-entry' french_elision = es.token_filter('french_elision', type='elision', articles_case=True, articles=[ 'l', 'm', 't', 'qu', 'n', 's', 'j', 'd', 'c', 'jusqu', 'quoiqu', 'lorsqu', 'puisqu' ]) geocompletion_ngram_filter = es.token_filter('geocompletion_ngram', type='edgeNGram', min_gram=1, max_gram=50, side='front') town_filter = es.token_filter('town_filter', type='pattern_replace', pattern=' ', replacement='-') geocompletion_index_tokenizer = es.tokenizer( 'geocompletion_index_tokenizer', type='pattern', pattern='@') geocompletion_index_analyzer = es.analyzer( 'geocompletion_index_analyzer', type='custom', tokenizer=geocompletion_index_tokenizer, filter=[ 'lowercase', 'asciifolding', french_elision, town_filter, geocompletion_ngram_filter ]) geocompletion_search_analyzer = es.analyzer( 'geocompletion_search_analyzer', type='custom', tokenizer=geocompletion_index_tokenizer, filter=['lowercase', 'asciifolding', town_filter, french_elision]) name = es.String(index='analyzed', analyzer=geocompletion_index_analyzer, search_analyzer=geocompletion_search_analyzer, fields=dict(raw=es.String(index='not_analyzed'))) complement = es.String(index='not_analyzed') postal_code_ngram_filter = es.token_filter('postal_code_ngram', type='edgeNGram', min_gram=1, max_gram=5, side='front') postal_code_index_analyzer = es.analyzer('postal_code_index_analyzer', type='custom', tokenizer='standard', filter=[postal_code_ngram_filter]) postal_code_search_analyzer = es.analyzer('postal_code_search_analyzer', type='custom', tokenizer='standard') postal_code = es.String(index='analyzed', analyzer=postal_code_index_analyzer, search_analyzer=postal_code_search_analyzer, fields=dict(raw=es.String(index='not_analyzed'))) geolocation = es.GeoPoint() weight = es.Float() def __init__(self, meta=None, **kwargs): super(Geocomplete, self).__init__(meta, **kwargs) if self.index in compute_index_name(self.index): self._doc_type.index = compute_index_name(self.index) @property def index(self): return self._doc_type.index @property def doc_type(self): return self._doc_type.name
class Inschrijving(es.DocType): _display = es.Keyword() _kvk_display = es.Keyword() doctype = es.Keyword() kvk_nummer = es.Text(analyzer=analyzers.autocomplete, fields={ 'raw': es.Keyword(), 'nozero': es.Text(analyzer=analyzers.nozero) }) vestigingsnummer = es.Text(analyzer=analyzers.autocomplete, fields={ 'raw': es.Keyword(), 'nozero': es.Text(analyzer=analyzers.nozero), 'int': es.Integer() }) hoofdvestiging = es.Boolean() sbi = es.Nested( properties={ 'code': es.Text(analyzer=analyzers.autocomplete, fields={'raw': es.Keyword()}), 'omschrijving': es.Text(), }) naam = es.Text(analyzer=analyzers.adres, fields={ 'raw': es.Keyword(), 'ngram': es.Text(analyzer=analyzers.autocomplete, search_analyzer='standard') }) handelsnamen = es.Nested( properties={ 'naam': es.Text(analyzer=analyzers.adres, fields={ 'raw': es.Keyword(), 'ngram': es.Text(analyzer=analyzers.autocomplete, search_analyzer='standard') }) }) postadres = es.Text(analyzer=analyzers.adres, fields={ 'raw': es.Keyword(), 'ngram': es.Text(analyzer=analyzers.autocomplete, search_analyzer='standard') }) bezoekadres = es.Text(analyzer=analyzers.adres, fields={ 'raw': es.Keyword(), 'ngram': es.Text(analyzer=analyzers.autocomplete, search_analyzer='standard') }) bezoekadres_correctie = es.Boolean() # hoofdvestiging centroid = es.GeoPoint() class Index: name = settings.ELASTIC_INDICES['HR']
class Nummeraanduiding(es.Document): """ Elastic doc for all meta of a nummeraanduiding. Used in the dataselectie portal The link with any data that is being used here is the bag_id. """ nummeraanduiding_id = es.Keyword() landelijk_id = es.Keyword() _openbare_ruimte_naam = es.Keyword() naam = es.Keyword() huisnummer = es.Integer() huisnummer_toevoeging = es.Keyword() huisletter = es.Keyword() postcode = es.Keyword() woonplaats = es.Keyword() buurt_code = es.Keyword() buurt_naam = es.Keyword() buurtcombinatie_code = es.Keyword() buurtcombinatie_naam = es.Keyword() ggw_code = es.Keyword() ggw_naam = es.Keyword() gsg_naam = es.Keyword() stadsdeel_code = es.Keyword() stadsdeel_naam = es.Keyword() # Extended information centroid = es.GeoPoint() status = es.Keyword() type_desc = es.Keyword() type_adres = es.Keyword() # Landelijke codes openbare_ruimte_landelijk_id = es.Keyword() verblijfsobject = es.Keyword() ligplaats = es.Keyword() standplaats = es.Keyword() # Verblijfsobject specific data gebruiksdoel = es.Keyword(index=False, multi=True) gebruiksdoel_woonfunctie = es.Keyword() gebruiksdoel_gezondheidszorgfunctie = es.Keyword() geconstateerd = es.Keyword() in_onderzoek = es.Keyword() aantal_eenheden_complex = es.Integer() aantal_kamers = es.Integer() toegang = es.Keyword(index=False, multi=True) verdieping_toegang = es.Integer() bouwlagen = es.Integer() hoogste_bouwlaag = es.Integer() laagste_bouwlaag = es.Integer() oppervlakte = es.Integer() bouwblok = es.Keyword() gebruik = es.Keyword() eigendomsverhouding = es.Keyword() # Only for CSV panden = es.Keyword() # id values pandnaam = es.Keyword() bouwjaar = es.Keyword() type_woonobject = es.Keyword() ligging = es.Keyword() class Meta: doc_type = 'nummeraanduiding' class Index: doc_type = 'nummeraanduiding' name = settings.ELASTIC_INDICES['DS_BAG_INDEX']
class Inschrijving(es.Document): """ Elastic data of 'vestigingen' or 'mac' from handelsregister """ maatschappelijke_activiteit_id = es.Keyword() vestiging_id = es.Keyword() dataset = es.Keyword() kvk_nummer = es.Keyword() handelsnaam = es.Keyword() datum_aanvang = es.Date() eigenaar_naam = es.Keyword() eigenaar_id = es.Keyword() non_mailing = es.Boolean() aantal_werkzame_personen = es.Integer() rechtsvorm = es.Keyword() # Address information bezoekadres_volledig_adres = es.Keyword() bezoekadres_correctie = es.Boolean() bezoekadres_afgeschermd = es.Boolean() bezoekadres_openbare_ruimte = es.Keyword() bezoekadres_huisnummer = es.Integer() bezoekadres_huisletter = es.Keyword() bezoekadres_huisnummertoevoeging = es.Keyword() bezoekadres_postcode = es.Keyword() bezoekadres_plaats = es.Keyword() bezoekadres_buurt_code = es.Keyword() bezoekadres_buurt_naam = es.Keyword() bezoekadres_buurtcombinatie_code = es.Keyword() bezoekadres_buurtcombinatie_naam = es.Keyword() bezoekadres_ggw_code = es.Keyword() bezoekadres_ggw_naam = es.Keyword() bezoekadres_gsg_naam = es.Keyword() bezoekadres_stadsdeel_code = es.Keyword() bezoekadres_stadsdeel_naam = es.Keyword() postadres_volledig_adres = es.Keyword() postadres_correctie = es.Boolean() postadres_afgeschermd = es.Boolean() postadres_openbare_ruimte = es.Keyword() postadres_huisnummer = es.Integer() postadres_huisletter = es.Keyword() postadres_huisnummertoevoeging = es.Keyword() postadres_postcode = es.Keyword() postadres_plaats = es.Keyword() # And the bag numid bag_numid = es.Keyword() adresseerbaar_object_id = identificatie = es.Keyword() centroid = es.GeoPoint() # Categores hoofdcategorie = es.Keyword(multi=True) subcategorie = es.Keyword(multi=True) # SBI codes sbi_code = es.Text( multi=True, fielddata=True, analyzer=autocomplete, ) sbi_omschrijving = es.Keyword(multi=True) sbi_l1 = es.Keyword(multi=True) sbi_l2 = es.Keyword(multi=True) sbi_l3 = es.Keyword(multi=True) sbi_l4 = es.Keyword(multi=True) sbi_l5 = es.Keyword(multi=True) # bijzondere rechtstoestand # status = es.Keyword() bijzondere_rechtstoestand = es.Keyword() class Meta: all = es.MetaField(enabled=False) doc_type = 'vestiging' class Index: doc_type = 'vestiging' name = settings.ELASTIC_INDICES['DS_HR_INDEX']