class IndexedFile(DocType): name = Text(fields={ '_exact': Keyword() }) path = Text(fields={ '_exact': Keyword(), '_path': Text(analyzer=path_analyzer), '_path_real': Text(analyzer=path_analyzer) }) lastModified = Date() length = Long() format = String() mimeType = Keyword() type = String() system = String(fields={ '_exact': Keyword() }) systemId = String() dsMeta = Nested() permissions = Nested(properties={ 'username': Keyword(), 'recursive': Boolean(), 'permission': Nested(properties={ 'read': Boolean(), 'write': Boolean(), 'execute': Boolean() }) }) uuid = Keyword() class Meta: index = settings.ES_INDICES['files']['name'] doc_type = settings.ES_INDICES['files']['documents'][0]['name'] dynamic = MetaField('strict')
class Model(DocType): id = Integer() name = String(fields={'sub': String()}) other_models = Nested( doc_class=OtherModel, multi=True, properties={ 'id': Integer(), 'name': String(fields={ 'raw': String(index='not_analyzed'), 'sub': String() }) }) class Meta: index = 'models' def get_term_query(self, column_name, value, default_op='should'): tq = ElasticSearchMixin.get_match_query(value, default_op) if column_name is None: column_name = 'name' return {'match': {column_name: tq}}
class Node(PicasoDocType): _id = String() owner = String() description = String() class Meta: index = 'nodes'
def test_mapping_can_collect_multiple_analyzers(): a1 = analysis.analyzer( 'my_analyzer1', tokenizer='keyword', filter=['lowercase', analysis.token_filter('my_filter1', 'stop', stopwords=['a', 'b'])], ) a2 = analysis.analyzer( 'my_analyzer2', tokenizer=analysis.tokenizer('trigram', 'nGram', min_gram=3, max_gram=3), filter=[analysis.token_filter('my_filter2', 'stop', stopwords=['c', 'd'])], ) m = mapping.Mapping('article') m.field('title', 'string', analyzer=a1, index_analyzer=a1, search_analyzer=a2) m.field( 'text', 'string', analyzer=a1, fields={ 'english': String(index_analyzer=a1), 'unknown': String(index_analyzer=a1, search_analyzer=a2), } ) assert { 'analyzer': { 'my_analyzer1': {'filter': ['lowercase', 'my_filter1'], 'tokenizer': 'keyword', 'type': 'custom'}, 'my_analyzer2': {'filter': ['my_filter2'], 'tokenizer': 'trigram', 'type': 'custom'}}, 'filter': { 'my_filter1': {'stopwords': ['a', 'b'], 'type': 'stop'}, 'my_filter2': {'stopwords': ['c', 'd'], 'type': 'stop'}}, 'tokenizer': {'trigram': {'max_gram': 3, 'min_gram': 3, 'type': 'nGram'}} } == m._collect_analysis()
def test_mapping_can_collect_all_analyzers(): a1 = analysis.analyzer('my_analyzer1', tokenizer='keyword', filter=['lowercase', analysis.token_filter('my_filter1', 'stop', stopwords=['a', 'b'])], ) a2 = analysis.analyzer('english') a3 = analysis.analyzer('unknown_custom') a4 = analysis.analyzer('my_analyzer2', tokenizer=analysis.tokenizer('trigram', 'nGram', min_gram=3, max_gram=3), filter=[analysis.token_filter('my_filter2', 'stop', stopwords=['c', 'd'])], ) m = mapping.Mapping('article') m.field('title', 'string', analyzer=a1, fields={ 'english': String(index_analyzer=a2), 'unknown': String(search_analyzer=a3), } ) m.field('comments', Nested(properties={ 'author': String(index_analyzer=a4) })) assert { 'analyzer': { 'my_analyzer1': {'filter': ['lowercase', 'my_filter1'], 'tokenizer': 'keyword', 'type': 'custom'}, 'my_analyzer2': {'filter': ['my_filter2'], 'tokenizer': 'trigram', 'type': 'custom'}}, 'filter': { 'my_filter1': {'stopwords': ['a', 'b'], 'type': 'stop'}, 'my_filter2': {'stopwords': ['c', 'd'], 'type': 'stop'}, }, 'tokenizer': { 'trigram': {'max_gram': 3, 'min_gram': 3, 'type': 'nGram'}, } } == m._collect_analysis()
class DomainFeaturesDoc(GenericDoc): class Meta: doc_type = "opendns_domain_features" age = Integer() ttls_min = Integer() ttls_max = Integer() ttls_mean = Float() ttls_median = Float() ttls_stddev = Float() country_codes = String() country_count = Integer() asns = Integer() asns_count = Integer() prefixes = String() prefix_count = Integer() rips = Integer() div_rips = Float() locations = GeoPoint() locations_count = Integer() geo_distance_sum = Float() geo_distance_mean = Float() non_routable = Boolean() mail_exchanger = Boolean() cname = Boolean() ff_candidate = Boolean() rips_stability = Float() base_domain = String() is_subdomain = Boolean()
class SensorLog(DocType): sensor_name = String(index='not_analyzed') temperature = Float() moisture = Integer() light = Integer() timestamp = Date() # timeparts for fast queries and aggs later, calculated during save day_of_week = String(index='not_analyzed') # Mon-Fri day_of_year = Integer() # 1-366 day_of_month = Integer() # 1-31 week_of_year = Integer() # 1-53 month = Integer() # 1-12 === Jan-Dec year = Integer() hour = Integer() minute = Integer() class Meta: index = 'sensor_log' def save(self, **kwargs): self.day_of_week = WEEKDAYS[self.timestamp.weekday()] self.day_of_year = self.timestamp.timetuple().tm_yday self.day_of_month = self.timestamp.day self.week_of_year = self.timestamp.isocalendar()[1] self.month = self.timestamp.month self.year = self.timestamp.year self.hour = self.timestamp.hour self.minute = self.timestamp.minute return super(SensorLog, self).save(**kwargs) def is_published(self): return datetime.now() < self.published_from
class DomainSecurityInfoDoc(GenericDoc): class Meta: doc_type = "opendns_security_info" dga_score = Float() perplexity = Float() entropy = Float() securerank2 = Float() pagerank = Float() asn_score = Float() prefix_score = Float() rip_score = Float() fastflux = Boolean() popularity = Float() geodiversity = Nested(doc_class=GeoDiversityDoc) geodiversity_normalized = Nested(doc_class=GeoDiversityDoc) tld_geodiversity = Nested(doc_class=GeoDiversityDoc) geoscore = Float() ks_test = Float() attack = String() threat_type = String() found = Boolean() def __init__(self, jdata={}): GenericDoc.__init__(self) for k, v in jdata.items(): if v is None: v = {} setattr(self, k, v)
class RequirementIndex(DocType): django_id = Integer() title = String(analyzer='snowball', fields={'raw': String(index='no')}) country = Integer() description = String(analyzer=html_strip) is_anonymous = Boolean() type_of_employment = String(index='no') is_active = Boolean() is_deleted = Boolean() created_at = Date() @staticmethod def get_model(): from jobs.models import Requirement return Requirement @classmethod def to_index(cls, obj): index_instance = cls( django_id=obj.pk, title=obj.title, country=obj.country.pk, description=obj.description, is_anonymous=obj.is_anonymous, type_of_employment=obj.type_of_employment, is_active=obj.is_active, is_deleted=obj.is_deleted, created_at=obj.created_at ) return index_instance
class IPEnrichment(Enrichment): class Meta: doc_type = "passivetotal_ip_enrichment" network = String() autonomousSystemName = String() autonomousSystemNumber = Integer() country = String() sinkhole = Boolean() location = GeoPoint() # def __setattr__(self, key, value): # if key == "network": # value = convert_cidr(value) # super(IPEnrichment, self).__setattr__(key, value) def __init__(self, jdata={}): Enrichment.__init__(self, jdata) for k, v in jdata.items(): if v is None: v = {} setattr(self, k, v)
class ProfileIndex(DocType): django_id = Integer() email = String(analyzer='snowball', fields={'raw': String(index='no')}) name = String(analyzer='snowball', fields={'raw': String(index='no')}) is_active = Boolean() @staticmethod def get_model(): from b24online.models import Profile return Profile @classmethod def get_queryset(cls): return cls.get_model().objects.all().select_related('country', 'user') @classmethod def to_index(cls, obj): index_instance = cls( django_id=obj.pk, email=obj.user.email, name=obj.full_name, is_active=obj.user.is_active ) return index_instance
class ResumeIndex(DocType): django_id = Integer() name = String(analyzer='snowball', fields={'raw': String(index='no')}) country = Integer() is_active = Boolean() is_deleted = Boolean() created_at = Date() @staticmethod def get_model(): from jobs.models import Resume return Resume @classmethod def get_queryset(cls): return cls.get_model().objects.all() \ .select_related('user', 'user__profile', 'user__profile__country') @classmethod def to_index(cls, obj): index_instance = cls( django_id=obj.pk, name=getattr(obj.user.profile, 'full_name', None), is_active=obj.is_active, is_deleted=obj.is_deleted, created_at=obj.created_at ) if obj.user.profile: index_instance.country = getattr(obj.user.profile.country, 'pk', None) return index_instance
class Page(DocType): """Each site lives in its own index.""" created = Date() path = String() site_id = Integer() fields = Object() body = String() @property def site(self): return Site.query.get(self.site_id) @property def primary_domain(self): return Domain.query.filter_by(site=self.site, primary=True).one() def set_html(self, body): soup = BeautifulSoup(body, 'html.parser') self.body = soup.get_text() def set_markdown(self, body): self.set_html(markdown(body)) class Meta: using = es_client
class Runtime(DocType): runtime_id = String(index='not_analyzed') name = String(index='not_analyzed') description = String(index='not_analyzed') class Meta: index = 'pratai'
def test_properties_can_iterate_over_all_the_fields(): m = mapping.Mapping('testing') m.field('f1', 'string', test_attr='f1', fields={'f2': String(test_attr='f2')}) m.field('f3', Nested(test_attr='f3', properties={ 'f4': String(test_attr='f4')})) assert set(('f1', 'f2', 'f3', 'f4')) == set(f.test_attr for f in m.properties._collect_fields())
class CommunityProfile(PublicProfile): resume = String() freetext = String() section = String() class Meta: index = "community_profiles"
class VisnykDocument(DocType): """Visnyk document.""" plain_content = String(analyzer="html_uk_analyzer") goods_name = String(analyzer="html_uk_analyzer") cust_name = String(analyzer="html_uk_analyzer") def get_absolute_url(self): return reverse("preview", kwargs={"doc_id": self._id})
class TopicDocType(BaseDocType): serialized = base_serialized_field()\ .field('types', String(index='not_analyzed'))\ .field('same_as', String(index='not_analyzed')) class Meta: index = settings.ELASTICSEARCH_PREFIX + '-items' doc_type = 'topic'
class LogType(DocType): filename = String(index='analyzed') level = String(fields={'raw': String(index='not_analyzed')}) message = String(analyzer='snowball') created_at = Date() def __unicode__(self): return u'%s %s' % (self.created_at, self.message)
class Bids(DocType): item = String(index='not_analyzed') #create relationship bidder = String(index='not_analyzed') bid_amount = Integer() class Meta: index = 'bidding' doc_type = 'bid'
class Repository(DocType): created_at = Date() description = String(analyzer='snowball') tags = String(index='not_analyzed') class Meta: index = 'git' doc_type = 'repos'
class BlogPost(DocType): authors = Nested(required=True, doc_class=Author, properties={ 'name': String(required=True), 'email': String(required=True) }) created = Date()
class RapidNHEventType(DocType): class Meta: index = settings.ES_INDICES['rapid']['name'] doc_type = settings.ES_INDICES['rapid']['documents'][0]['name'] dynamic = MetaField('strict') display_name = String(fields={'_exact': Keyword()}) name = String(fields={'_exact': Keyword()})
class UpdateResults(DocType): hostname = String(index="not_analyzed") updateTitle = String(index="not_analyzed") kb = String(index="not_analyzed") installed = Boolean() date = Date() class Meta: index = 'updateresults'
class ResourceRecordDoc(GenericDoc): class Meta: doc_type = "opendns_resource_record" name = String() ttl = Integer() class_type = String() query_type = String() rr = Ip()
class PublicProfile(DocType): pid = Integer() hrpid = String() sex = String() fullname = String() promo = String() class Meta: index = "public_profiles"
class CloudPlatformDoc(BaseSearchModel): name = String() roles = Long(include_in_all=False) autocomplete = String(analyzer=autocomplete, search_analyzer='standard') class Meta: index = 'galaxy_cloud_platforms' all = MetaField(enabled=True) dynamic = MetaField(enabled=False)
class HostTracker(GenericDoc): class Meta: doc_type = "passivetotal_host_tracker" lastSeen = Date() firstSeen = Date() attributeType = String() attributeValue = String()
class OSIntEnrichment(GenericDoc): class Meta: doc_type = "passivetotal_osint_enrichment" source = String() sourceUrl = String() inReport = String() tags = String()
class MalwareEnrichment(GenericDoc): class Meta: doc_type = "passivetotal_malware_enrichment" source = String() sourceUrl = String() sample = String() collectionDate = Date()