class IPEnrichment(Enrichment): class Meta: doc_type = "passivetotal_ip_enrichment" network = String() autonomousSystemName = String() autonomousSystemNumber = Integer() country = String() sinkhole = Boolean() location = GeoPoint() # def __setattr__(self, key, value): # if key == "network": # value = convert_cidr(value) # super(IPEnrichment, self).__setattr__(key, value) def __init__(self, jdata={}): Enrichment.__init__(self, jdata) for k, v in jdata.items(): if v is None: v = {} setattr(self, k, v)
class DomainFeaturesDoc(GenericDoc): class Meta: doc_type = "opendns_domain_features" age = Integer() ttls_min = Integer() ttls_max = Integer() ttls_mean = Float() ttls_median = Float() ttls_stddev = Float() country_codes = String() country_count = Integer() asns = Integer() asns_count = Integer() prefixes = String() prefix_count = Integer() rips = Integer() div_rips = Float() locations = GeoPoint() locations_count = Integer() geo_distance_sum = Float() geo_distance_mean = Float() non_routable = Boolean() mail_exchanger = Boolean() cname = Boolean() ff_candidate = Boolean() rips_stability = Float() base_domain = String() is_subdomain = Boolean()
class StationStatus(DocType): """ { "station_id":"72", "num_bikes_available":11, "num_bikes_disabled":1, "num_docks_available":27, "num_docks_disabled":0, "is_installed":1, "is_renting":1, "is_returning":1, "last_reported":1478989087, "eightd_has_available_keys":false } """ station_id = Text() location = GeoPoint() num_bikes_available = Integer() num_docks_available = Integer() num_docs_disabled = Integer() is_installed = Integer() is_renting = Integer() is_returning = Integer() last_reported = Date() eightd_has_key_dispenser = Boolean() station_name = Text(analyzer='snowball') class Meta: index = 'station_status'
class WeiboTweetDoc(Document): id = Keyword() url = Text(fields={'key': Keyword()}) uid = Keyword() content = Text(analyzer="ik_max_word", search_analyzer="ik_max_word", fields={'key': Keyword()}) likes_count = Integer() retweets_count = Integer() replies_count = Integer() client = Text(analyzer="ik_max_word", search_analyzer="ik_max_word", fields={ 'key': Keyword(), }) image_urls = Text() video_urls = Text() location = Text(analyzer="ik_max_word", search_analyzer="ik_max_word", fields={ 'key': Keyword(), }) geo = GeoPoint() is_origin = Boolean() origin_tweet = Text() tags = Text(analyzer="ik_max_word", search_analyzer="ik_max_word", fields={ 'key': Keyword(), }) created_at = Date() class Index: name = "weibo_tweets"
class StationInformation(DocType): """ { u'capacity': 39, u'name': u'W 52 St & 11 Ave', u'short_name': u'6926.01', u'lon': -73.99392888, u'lat': 40.76727216, u'station_id': u'72', u'rental_methods': [u'KEY', u'CREDITCARD'], u'eightd_has_key_dispenser': False, u'region_id': 71 }, """ capacity = Integer() name = Text(analyzer='snowball') short_name = Text(analyzer='snowball') location = GeoPoint() station_id = Text(analyzer='snowball') # TODO can rental_methods be an array? Maybe nested? rental_methods = Text() # [u'KEY', u'CREDITCARD'], eightd_has_key_dispenser = Boolean() region_id = Integer() status_date = Date() class Meta: index = 'stations' """
class CarDocumentDSlBaseField(DocType): position = GeoPoint() class Meta: model = Car index = 'car_index' fields = ['name', 'price']
class RapidNHEvent(DocType): class Meta: index = settings.ES_INDICES['rapid']['name'] doc_type = settings.ES_INDICES['rapid']['documents'][1]['name'] dynamic = MetaField('strict') event_date = Date() created_date = Date() title = Text(analyzer='english') event_type = String(fields={'_exact': Keyword()}) location_description = Text(analyzer='english', fields={'_exact': Keyword()}) location = GeoPoint() main_image_url = String(fields={'_exact': Keyword()}) main_image_uuid = String(fields={'_exact': Keyword()}) datasets = Nested( properties={ "id": String(fields={'_exact': Keyword()}), "title": String(fields={'_exact': Keyword()}), "doi": String(fields={'_exact': Keyword()}), "url": String(fields={'_exact': Keyword()}), }) def save(self, **kwargs): # self.created_date = datetime.utcnow() return super(RapidNHEvent, self).save(**kwargs)
class EducatorDocument(Document): """Educator information""" id = Keyword() location = GeoPoint() categories = Keyword() contributing_to_courses = Keyword() rating = ScaledFloat(scaling_factor=100) active = Boolean() show_in_listings = Boolean() local_connect = Boolean() online_connect = Boolean() short_bio = Text(required=False) created_at = Date() updated_at = Date() class Index: name = 'educator_educator' def save(self, **kwargs): self.meta.id = self.id del self.id self.created_at = datetime.now() self.updated_at = datetime.now() self.active = True return super().save(**kwargs) def update(self, **kwargs): return super().update(updated_at=datetime.now(), **kwargs)
class Indicator(Document): class Index: name = 'indicators-*' indicator = Keyword() indicator_ipv4 = Ip() indicator_ipv4_mask = Integer() indicator_ipv6 = Ip() indicator_ipv6_mask = Integer() group = Keyword() itype = Keyword() tlp = Keyword() provider = Keyword() portlist = Text() asn = Float() asn_desc = Text() cc = Text(fields={'raw': Keyword()}) protocol = Text(fields={'raw': Keyword()}) confidence = Integer() timezone = Text() city = Text(fields={'raw': Keyword()}) description = Keyword() tags = Keyword(multi=True, fields={'raw': Keyword()}) rdata = Keyword() count = Integer() location = GeoPoint() region = Keyword() latitude = Float() longitude = Float() ns = Keyword() mx = Keyword() reported_at = Date() last_at = Date() first_at = Date() created_at = Date()
def main(): nltk.download('vader_lexicon') # Prepare index mappings mapping = Mapping(DOC_TYPE) mapping.field('centroid', GeoPoint()) mapping.field('timestamp_ms', Date()) mapping.save(TARGET_INDEX) try: # API Documentation # https://developer.twitter.com/en/docs/tweets/filter-realtime/api-reference/post-statuses-filter streaming_api = TweetStreamer(CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET) # Select bounding box here: http://boundingbox.klokantech.com mk_ltn_nham = '-1.0282,51.8575,-0.3249,52.2864' # Milton Keynes + Luton + N'hampton uk = '-11.21,50.08,1.56,58.98' # UK us_can = '-126.95,24.7,-59.68,50.01' # US + Canada eu_nafr = '-30.2,26.5,52.9,71.0' # Europe + north africa # Keywords are expressed as a comma-separated list terms = 'gdpr' # Disclaimer 1: Twitter Streaming API cannot filter by terms AND location! # Disclaimer 2: The API returns an incredibly small subset of tweets... # streaming_api.statuses.filter(track=terms) streaming_api.statuses.filter(locations=uk) except ConnectionError as err: LOGGER.error('Connection error! %s', err)
class EventPlace(DocType): place_id = Text(fields={'raw': Keyword()}) provider_id = Text(fields={'raw': Keyword()}) provider = Text() date_added = Date() slug = String(fields={'raw': Keyword()}) title = Text(fields={'raw': Keyword()}, analyzer='russian') address = Text(fields={'raw': Keyword()}, analyzer='russian') city = Text(fields={'raw': Keyword()}, analyzer='russian') description = Text() lat = Float() lng = Float() geometry = GeoPoint() email = Text() website = Text() phone = Text() class Meta: doc_type = 'places' index = 'place-index' def save(self, **kwargs): if not self.date_added: self.date_added = datetime.now() return super(EventPlace, self).save(**kwargs)
class StudentDocument(Document): """Student information""" id = Keyword() location = GeoPoint() categories = Keyword() courses = Keyword() active = Boolean() show_in_listings = Boolean() local_connect = Boolean() online_connect = Boolean() short_bio = Text(required=False) created_at = Date() updated_at = Date() class Index: name = 'student_student' def save(self, **kwargs): self.meta.id = self.id del self.id self.created_at = datetime.now() self.updated_at = datetime.now() self.active = True return super().save(**kwargs) def update(self, **kwargs): return super().update(updated_at=datetime.now(), **kwargs)
class Record(DocType): url = Text() title = Text(fields={'raw': Keyword()}) address = Text(fields={'raw': Keyword()}) location = GeoPoint() contract_type = Keyword() description = Text(analyzer=html_strip) extra = Text(analyzer=html_strip) building_type = Text(fields={'raw': Keyword()}) structure_materials = Text(fields={'raw': Keyword()}) agency_broker = Text(fields={'raw': Keyword()}) compartiment = Keyword() num_of_rooms = Integer() num_of_kitchens = Integer() num_of_bathrooms = Integer() built_year = Integer() floor = Keyword() build_surface_area = Integer() usable_surface_area = Integer() price = Integer() height_category = Keyword() currency = Keyword() created_at = Date() added_at = Date() class Meta: index = 'imobiliare' def save(self, **kwargs): self.created_at = datetime.now() return super().save(**kwargs)
class Indicator(DocType): indicator = Keyword() indicator_ipv4 = Ip() indicator_ipv4_mask = Integer() indicator_ipv6 = Keyword() indicator_ipv6_mask = Integer() group = Keyword() itype = Keyword() tlp = Keyword() provider = Keyword() portlist = Text() asn = Float() asn_desc = Text() cc = Text(fields={'raw': Keyword()}) protocol = Text(fields={'raw': Keyword()}) reporttime = Date() lasttime = Date() firsttime = Date() confidence = Integer() timezone = Text() city = Text(fields={'raw': Keyword()}) description = Keyword() tags = Keyword(multi=True, fields={'raw': Keyword()}) rdata = Keyword() count = Integer() message = Text(multi=True) location = GeoPoint()
class ParentDocument(Document): """Parent information""" id = Keyword() first_name = Text(fields={'keyword': Keyword()}) last_name = Text(fields={'keyword': Keyword()}) email = Text(fields={'keyword': Keyword()}) location = GeoPoint() categories = Keyword() achievements = Keyword() rating = ScaledFloat(scaling_factor=100) active = Boolean() show_in_listings = Boolean() local_connect = Boolean() online_connect = Boolean() short_bio = Text(required=False) created_at = Date() updated_at = Date() class Index: name = 'educator_educator' def save(self, **kwargs): self.meta.id = self.id del self.id self.created_at = datetime.now() self.updated_at = datetime.now() self.active = True return super().save(**kwargs) def update(self, **kwargs): return super().update(updated_at=datetime.now(), **kwargs)
class SDGeo(Document): """Document structure for the sd_geo index""" location = GeoPoint() number = Integer street = Text() latitude = Float() longitude = Float()
class Entity(Document): id = Integer() name = Keyword() cityId = Keyword() wardId = Integer() level = Integer() menuId = Keyword() rb_pin = GeoPoint() lat = Float() lng = Float() total = Integer() type = Keyword() data = Text() wardName = Keyword() cityName = Keyword() address = Keyword() jurisdiction = Text() icon = Keyword() impact = Integer() category = Keyword() subcategory = Keyword() closed_at = Date() closed_by = Keyword() phone = Keyword() created_by = Keyword() creator_org = Keyword() persons = Nested(Person) class Index: name = 'rb_locations' settings = {"number_of_shards": 2} def mprint(self): print("Org: ", self.meta.id, self.name, self.cityId, self.wardId, self.level, self.menuId, self.lat, self.lng, self.total, self.type, self.data, self.wardName, self.cityName, self.address, self.jurisdiction, self.impact, self.category, self.subcategory, self.closed_at, self.closed_by, self.phone, self.created_by, self.creator_org) for person in self.persons: person.mprint() def __eq__(self, other): if self.id != other.id or self.name != other.name or self.cityId != other.cityId or self.wardId != other.wardId or self.level != other.level or self.menuId != other.menuId or self.total != other.total or self.type != other.type or self.data != other.data or self.wardName != other.wardName or self.cityName != other.cityName or self.address != other.address or self.jurisdiction != other.jurisdiction or self.icon != other.icon or self.impact != other.impact or self.category != other.category or self.subcategory != other.subcategory or self.closed_by != other.closed_by or self.closed_at != other.closed_at or self.phone != other.phone or self.created_by != other.created_by or self.creator_org != other.creator_org: return False if abs(self.lat - float(other.lat)) > math.pow( 10, -8) or abs(self.lng - float(other.lng)) > math.pow(10, -8): return False if len(self.persons) != len(other.persons): return False for i, person in enumerate(self.persons): if not person == other.persons[i]: return False return True
class CarDocumentDSlBaseField(DocType): position = GeoPoint() class Django: model = Car fields = ['name', 'price'] class Index: name = 'car_index'
class CarDocumentDSlBaseField(Document): position = GeoPoint() class Django: model = Car fields = ["name", "price"] class Index: name = "car_index"
class IndividualEncounter(InnerDoc): id = UUIDField(required=True) point = GeoPoint(required=False) animate_status = Keyword() sex = EnumField(Sex, required=False) submitter_id = Keyword(required=True) date_occurred = Date() taxonomy = Keyword() has_annotation = Boolean(required=True)
class Gym(DocType): name = Text(analyzer='snowball', fields={'raw': Keyword()}) location = GeoPoint() class Meta: index = 'gym' class Index: name = 'gym'
class Listing(Document): """Base class containing the common fields.""" access = Text() additional_house_rules = Text() allows_events = Boolean() amenities = Keyword(multi=True) amenity_ids = Keyword(multi=True) bathrooms = Float() bedrooms = Integer() beds = Integer() business_travel_ready = Boolean() city = Text(fields={'keyword': Keyword()}, required=True) country = Text(fields={'keyword': Keyword()}, required=True) country_code = Text(fields={'keyword': Keyword()}, required=True) coordinates = GeoPoint() description = Text() host_id = Integer(fields={'keyword': Keyword()}) house_rules = Text() interaction = Text() is_hotel = Boolean() max_nights = Integer() min_nights = Integer() monthly_price_factor = Float() name = Text(fields={'keyword': Keyword()}, required=True) neighborhood_overview = Text() notes = Text() person_capacity = Integer() photo_count = Integer() photos = Keyword(multi=True) place_id = Text(fields={'keyword': Keyword()}) price_rate = Float() price_rate_type = Text(fields={'keyword': Keyword()}, required=True) province = Text(fields={'keyword': Keyword()}) rating_accuracy = Integer() rating_checkin = Integer() rating_cleanliness = Integer() rating_communication = Integer() rating_location = Integer() rating_value = Integer() review_count = Integer() review_score = Integer() room_and_property_type = Text(fields={'keyword': Keyword()}, required=True) room_type = Text(fields={'keyword': Keyword()}, required=True) satisfaction_guest = Float() star_rating = Float() state = Text(fields={'keyword': Keyword()}, required=True) state_short = Text(fields={'keyword': Keyword()}, required=True) summary = Text() transit = Text() url = Text(fields={'keyword': Keyword()}, required=True) weekly_price_factor = Float() class Index: name = 'scrapy_airbnb_listing' def save(self, **kwargs): return super(Listing, self).save(**kwargs)
class PropertyIndex(DocType): property_name = Text() city = Text() location = GeoPoint() price = Integer() no_of_bedrooms = Integer() no_of_bathrooms = Integer() class Meta: index = ES_INDEX type = 'doc'
class Place(Entity): location = GeoPoint() desc = Text() region = Text() country = Text() class Index: name = 'place' settings = { 'number_of_shards': 1, 'number_of_replicas': 0 }
class ZiroomDoc(Document): # 价格 price = Integer() # 城市 city = Text() # 位置 address = Text() # 详情 detail = Text() # 经纬度 location = GeoPoint() # 标题 title = Text() # 户型 house_type = Text() # 面积 area = Float() # 朝向 orientation = Text() # 楼层 floor = Integer() # 最高楼层 top_floor = Integer() # 标签 tags = Text(multi=True) # 图片 image_urls = Text(multi=True) # 房屋配置 deploy = Object() # url url = Text() # 交通 traffic = Text(multi=True) # 室友 chums = Object(multi=True) # 付款方式 payment = Object(multi=True) # 推荐 recommend = Object(multi=True) # 活动 activity = Object(multi=True) # 空气检测 air_part = Object() # 视频 vr_video = Object() class Index: name = 'hrent' class Meta: doc_type = 'ziroom'
class Event(InnerDoc): """ Meetup Group Event """ # required fields meetup_id = Text(required=True) time = Date(required=True) name = Text(required=True) link = Text(required=True) date_in_series_pattern = Boolean(required=True) # optional fields attendance_count = Integer() attendance_sample = Integer() attendee_sample = Integer() created = Date() description = Text() duration = Long() fee_accepts = Text() fee_amount = Integer() fee_currency = Text() fee_description = Text() fee_label = Text() how_to_find_us = Text() status = Text() updated = Date() utc_offset = Long() venue_visibility = Text() visibility = Text() # venue venue_address_1 = Text() venue_address_2 = Text() venue_address_3 = Text() venue_city = Text() venue_country = Text() venue_localized_country_name = Text() venue_name = Text() venue_phone = Text() venue_zip_code = Text() venue_location = GeoPoint() # event hosts event_host_host_count = Integer() event_host_id = Integer() event_host_intro = Text() event_host_join_date = Date() event_host_name = Text()
class Spam(DocType): X_Envelope_From = Object( properties = { 'email': String(index='not_analyzed'), 'header': String(index='not_analyzed'), 'localpart': String(index='not_analyzed'), 'domain': String(index='not_analyzed'), 'location': GeoPoint(), 'domain_type': String(index='not_analyzed'), 'country_code' : String(index='not_analyzed') } ) X_Envelope_To = String(index='not_analyzed') X_Spam_Flag = Boolean() X_Spam_Score = Float() To = String(multi=True, index='not_analyzed') Date = Date() From = String(index='not_analyzed') Reply_To = String(index='not_analyzed') X_Priority = Integer() #X_Mailer = String() MIME_Version = String(index='not_analyzed') Subject = String() Content_Transfer_Encoding = String(index='not_analyzed') Content_Type = String(index='not_analyzed') Charset = String(index='not_analyzed') Received = String(index='not_analyzed') Hops = Integer() Received_SPF = String(index = 'not_analyzed') DKIM_Signature = String(index = 'not_analyzed') ##### HEADERS RAJOUTES SUITE A TRAITEMENT #### spfResult = String(index = 'not_analyzed') spfTrue = String(index = 'not_analyzed') DKIM_Result = String(index = 'not_analyzed') DKIM_KeyLength = Integer() ############################################# #Message = String() phoneNumbers = String(multi=True, index='not_analyzed') URLs = String(multi=True, index='not_analyzed') attachmentsTypes = String(multi=True, index='not_analyzed') attachmentsSizes = Integer(multi=True) class Meta: index = 'default_index' doc_type = 'spam' def save(self, ** kwargs): return super().save(** kwargs)
class House(DocType): city = String() zip_code = String() address = String() price = Float() location = GeoPoint() created_at = Date() likes = Integer() class Meta: index = 'my_index' type = 'house' def save(self, **kwargs): return super(House, self).save(**kwargs)
class Listing(Document): """Base class containing the common fields.""" access = Text() additional_house_rules = Text() allows_events = Boolean() amenities = Keyword(multi=True) amenity_ids = Keyword(multi=True) avg_rating = Float() bathrooms = Float() bedrooms = Integer() beds = Integer() business_travel_ready = Boolean() city = Text(fields={'keyword': Keyword()}, required=True) country = Text(fields={'keyword': Keyword()}, required=True) coordinates = GeoPoint() description = Text() host_id = Integer(fields={'keyword': Keyword()}) house_rules = Text() interaction = Text() is_hotel = Boolean() monthly_price_factor = Float() name = Text(fields={'keyword': Keyword()}, required=True) neighborhood_overview = Text() person_capacity = Integer() photo_count = Integer() photos = Keyword(multi=True) place_id = Text(fields={'keyword': Keyword()}) price_rate = Float() price_rate_type = Text(fields={'keyword': Keyword()}, required=True) province = Text(fields={'keyword': Keyword()}) rating_accuracy = Float() rating_checkin = Float() rating_cleanliness = Float() rating_communication = Float() rating_location = Float() rating_value = Float() review_count = Integer() reviews = Nested() room_and_property_type = Text(fields={'keyword': Keyword()}, required=True) room_type = Text(fields={'keyword': Keyword()}, required=True) room_type_category = Text(fields={'keyword': Keyword()}, required=True) satisfaction_guest = Float() star_rating = Float() state = Text(fields={'keyword': Keyword()}, required=True) transit = Text() url = Text(fields={'keyword': Keyword()}, required=True) weekly_price_factor = Float()
class BOMStations(Document): id = Keyword(required=True) name = Keyword(required=True) state = Keyword() rainfall_districts = Keyword() date_opened = Date() coordinates = GeoPoint() last_update_date = Date() def save(self): self.meta.id = self.id self.last_updated_date = datetime.now() super().save() class Index: name = INDEX_NAME["stations"] settings = {"number_of_shards": 2, "number_of_replicas": 1}