class RedditLinkMediaOEmbed(InnerDoc): author_name = Keyword() author_url = Keyword() cache_age = Long(doc_values=False, index=False) description = Text( index_options=_INDEX_OPTIONS, index_phrases=_INDEX_PHRASES, term_vector=_INDEX_TERM_VECTOR, analyzer="standard", ) height = Short(doc_values=False, index=False) html = Keyword(doc_values=False, index=False) html5 = Keyword(doc_values=False, index=False) mean_alpha = Float(doc_values=False, index=False) provider_name = Keyword() provider_url = Keyword() thumbnail_height = Short(doc_values=False, index=False) thumbnail_url = Keyword(doc_values=False, index=False) thumbnail_size = Short(doc_values=False, index=False) thumbnail_width = Short(doc_values=False, index=False) title = Text( index_options=_INDEX_OPTIONS, index_phrases=_INDEX_PHRASES, term_vector=_INDEX_TERM_VECTOR, analyzer="standard", ) type = Keyword() version = Keyword() url = Keyword() width = Short(doc_values=False, index=False)
class MentorProject(Document): name = Keyword(required=True) company = Text() bio = Text() backgroundRural = Boolean(required=True) preferStudentUnderRep = Short(required=True) # (0-2) preferToolExistingKnowledge = Boolean(required=True) okExtended = Boolean(required=True) okTimezoneDifference = Boolean(required=True) timezone = Integer(required=True) # +- UTC id = Keyword(required=True) proj_description = Text(required=True) proj_tags = Keyword(multi=True) numStudentsSelected = Short() listStudentsSelected = Nested(StudentVote) track = Keyword(required=True) class Index: name = "mentors_index" settings = { "number_of_shards": 1, "number_of_replicas": 0, } def add_vote(self, student_id, choice): self.listStudentsSelected.append( StudentVote(student_id=student_id, choice=choice)) def save(self, **kwargs): self.numStudentsSelected = 0 return super().save(**kwargs)
class RedditLinkMediaEmbed(InnerDoc): content = Text( index_options=_INDEX_OPTIONS, index_phrases=_INDEX_PHRASES, term_vector=_INDEX_TERM_VECTOR, analyzer="standard", ) height = Short(doc_values=False, index=False) media_domain_url = Keyword(doc_values=False, index=False) scrolling = Boolean() width = Short(doc_values=False, index=False)
class RedditMediaMetadata(InnerDoc): dashUrl = Keyword(doc_values=False, index=False) # noqa: N815 e = Keyword() hlsUrl = Keyword(doc_values=False, index=False) # noqa: N815 id = Keyword(doc_values=False) isGif = Boolean() # noqa: N815 m = Keyword() s = Object(RedditMediaMetadataS) status = Keyword() t = Keyword() x = Short(doc_values=False, index=False) y = Short(doc_values=False, index=False)
class RedditLinkMedia(InnerDoc): content = Text( index_options=_INDEX_OPTIONS, index_phrases=_INDEX_PHRASES, term_vector=_INDEX_TERM_VECTOR, analyzer="standard", ) event_id = Keyword() height = Short(doc_values=False, index=False) oembed = Object(RedditLinkMediaOEmbed) reddit_video = Object(RedditLinkMediaRedditVideo) type = Keyword() width = Short(doc_values=False, index=False)
class ReviewElastic(Document): business_id = Keyword() cool = Short() date = Date(default_timezone='UTC', format='date_hour_minute_second') funny = Short() review_id = Keyword() stars = Short() text = Text() useful = Short() user_id = Keyword() class Index: name = INDEX_NAME
class Position(DocType): # 证券id securityId = Keyword() # 持有数量 amount = Long() # 可交易数量 availableAmount = Long() # 盈亏 profit = Float() # 市值 value = Float() # 成本价 cost = Float() # 交易类型(0代表T+0,1代表T+1) tradingT = Short() def __init__(self, meta=None, security_id=None, trading_t=1, **kwargs): super().__init__(meta, **kwargs) self.securityId = security_id self.availableAmount = 0 self.amount = 0 self.profit = 0 self.value = 0 self.cost = 0 self.tradingT = trading_t
class Position(DocType): # 证券id securityId = Keyword() # 做多数量 longAmount = Long() # 可平多数量 availableLong = Long() # 平均做多价格 averageLongPrice = Long() # 做空数量 shortAmount = Long() # 可平空数量 availableShort = Long() # 平均做空价格 averageShortPrice = Long() # 市值 或者 占用的保证金(方便起见,总是100%) value = Float() # 交易类型(0代表T+0,1代表T+1) tradingT = Short() def __init__(self, meta=None, security_id=None, trading_t=1, **kwargs): super().__init__(meta, **kwargs) self.securityId = security_id self.longAmount = 0 self.availableLong = 0 self.shortAmount = 0 self.availableShort = 0 self.profit = 0 self.value = 0 self.tradingT = trading_t
class TwitterEntitiesMedia(InnerDoc): id = Long(doc_values=False, index=False) id_str = Keyword(doc_values=False, index=False) indices = Short(doc_values=False, index=False, multi=True) media_url = Keyword(doc_values=False, index=False) media_url_https = Keyword(doc_values=False, index=False) url = Keyword(doc_values=False, index=False) display_url = Keyword(doc_values=False, index=False) expanded_url = Keyword(doc_values=False, index=False) type = Keyword() original_info = Object(TwitterEntitiesMediaOriginalInfo) sizes = Object(TwitterEntitiesMediaSizes) source_status_id = Long(doc_values=False, index=False) source_status_id_str = Keyword() source_user_id = Long(doc_values=False, index=False) source_user_id_str = Keyword() video_info = Object(TwitterEntitiesMediaVideoInfo) features = Object(TwitterEntitiesMediaFeatures) # {}? media_key = Keyword(doc_values=False, index=False) ext_media_availability = Object(TwitterEntitiesMediaExtMediaAvailability) ext_alt_text = Keyword(doc_values=False, index=False) ext_media_color = Object(TwitterExtensionsMediaColor) ext = Object(TwitterExtensions) additional_media_info = Object(TwitterEntitiesAdditionalMediaInfo)
class BusinessElastic(Document): business_id = Keyword() name = Text() address = Text() city = Text(fields={'raw': Keyword()}) state = Text(fields={'raw': Keyword()}) postal_code = Text() location = GeoPoint() stars = Short() review_count = Short() is_open = Boolean() categories = Keyword() Ambience = Text() BusinessParking = Text() GoodForMeal = Text() class Index: name = INDEX_NAME
class RedditLinkMediaRedditVideo(InnerDoc): dash_url = Keyword(doc_values=False, index=False) duration = Integer() fallback_url = Keyword(doc_values=False, index=False) height = Short(doc_values=False, index=False) hls_url = Keyword(doc_values=False, index=False) is_gif = Boolean() scrubber_media_url = Keyword(doc_values=False, index=False) transcoding_status = Keyword() width = Boolean()
class PostType(Document): published_at = Text() title = Text() body = Text() excerpt = Text() slug = Text() minutes_to_read = Short() class Index: name = ES_INDEX_NAME
class RedditAwarding(InnerDoc): award_type = Keyword() coin_price = Integer() coin_reward = Integer() count = Integer() days_of_drip_extension = Integer() days_of_premium = Integer() description = Text( index_options=_INDEX_OPTIONS, index_phrases=_INDEX_PHRASES, term_vector=_INDEX_TERM_VECTOR, analyzer="standard", ) end_date = RedditDate() icon_height = Short(doc_values=False, index=False) icon_url = Keyword(doc_values=False, index=False) icon_width = Short(doc_values=False, index=False) id = Keyword() is_enabled = Boolean() name = Keyword() resized_icons = Nested(RedditAwardingResizedIcon) start_date = RedditDate() subreddit_coin_reward = Integer() subreddit_id = Keyword()
class Order(DocType): # 订单id id = Keyword() # 交易员id botName = Keyword() # 证券id securityId = Keyword() # 买卖(多空) direction = Short() # 市价/限价 type = Keyword() # 价格 price = Float() # 数量 amount = Long() # 状态 status = Keyword() # 时间 timestamp = Date() class Meta: doc_type = 'doc' all = MetaField(enabled=False)
class TwitterEntitiesIndicesText(InnerDoc): indices = Short(doc_values=False, index=False, multi=True) text = Keyword(doc_values=False, index=False)
class TwitterExtensionsMediaColorPaletteRgb(InnerDoc): red = Short(doc_values=False, index=False) green = Short(doc_values=False, index=False) blue = Short(doc_values=False, index=False)
class TwitterEntitiesUrl(InnerDoc): url = Keyword() expanded_url = Keyword() display_url = Keyword() indices = Short(multi=True)
class TwitterEntitiesUserMention(InnerDoc): id = Long(doc_values=False, index=False) id_str = Keyword() indices = Short(doc_values=False, index=False, multi=True) name = Keyword() screen_name = Keyword()
class RedditLink(RedditBaseDocument): domain = Keyword() url = Keyword() title = Text( index_options=_INDEX_OPTIONS, index_phrases=_INDEX_PHRASES, term_vector=_INDEX_TERM_VECTOR, analyzer="standard", ) selftext = Text( index_options=_INDEX_OPTIONS, index_phrases=_INDEX_PHRASES, term_vector=_INDEX_TERM_VECTOR, analyzer="standard", ) selftext_html = Keyword(doc_values=False, index=False) link_flair_background_color = Keyword(doc_values=False, index=False) link_flair_css_class = Keyword() link_flair_richtext = Nested(RedditFlairRichtext) link_flair_template_id = Keyword() link_flair_text = Text( index_options=_INDEX_OPTIONS, index_phrases=_INDEX_PHRASES, term_vector=_INDEX_TERM_VECTOR, analyzer="standard", ) link_flair_text_color = Keyword(doc_values=False, index=False) link_flair_type = Keyword() media = Object(RedditLinkMedia) media_embed = Object(RedditLinkMediaEmbed) secure_media = Object(RedditLinkMedia) secure_media_embed = Object(RedditLinkMediaEmbed) preview = Object(RedditLinkPreview) thumbnail = Keyword(doc_values=False, index=False) thumbnail_width = Short(doc_values=False, index=False) thumbnail_height = Short(doc_values=False, index=False) collections = Nested(RedditLinkCollection) crosspost_parent = Keyword() # See documentation on this in RedditPost.from_dict(). # crosspost_parent_list = Nested(RedditLink) # noqa: E800 allow_live_comments = Boolean() brand_safe = Boolean() contest_mode = Boolean() disable_comments = Boolean() hide_score = Boolean() is_blank = Boolean() # For sample, always False if it exists. is_crosspostable = Boolean() is_meta = Boolean() # For sample, always None if it exists. is_original_content = Boolean() is_reddit_media_domain = Boolean() is_robot_indexable = Boolean() is_self = Boolean() is_video = Boolean() media_only = Boolean() over_18 = Boolean() pinned = Boolean() quarantine = Boolean() spoiler = Boolean() category = Keyword() content_categories = Keyword(multi=True) discussion_type = Keyword() # For sample, always None if it exists. post_categories = Keyword( multi=True) # For sample, always None if it exists. post_hint = Keyword() suggested_sort = Keyword() previous_visits = RedditDate(multi=True) view_count = Integer() # For sample, always None if it exists. whitelist_status = Keyword() wls = Short() parent_whitelist_status = Keyword() pwls = Short() num_comments = Integer() num_crossposts = Integer() event_is_live = Boolean() event_start = RedditDate() event_end = RedditDate() # Promotion-related. call_to_action = Keyword() domain_override = Keyword() embed_type = Keyword() embed_url = Keyword() href_url = Keyword() mobile_ad_url = Keyword(doc_values=False, index=False) outbound_link = Object(RedditLinkOutboundLink) promoted = Boolean() promoted_by = Long() show_media = Boolean() third_party_trackers = Keyword(multi=True, doc_values=False, index=False) third_party_tracking = Keyword(doc_values=False, index=False) third_party_tracking_2 = Keyword(doc_values=False, index=False) # Log-in required. hidden = Boolean() clicked = Boolean() visited = Boolean() # Moderator required. ignore_reports = Boolean() # For sample, always False if it exists. removed = Boolean() # For sample, always False if it exists. spam = Boolean() # For sample, always False if it exists. # No idea what these are. # There is a "from" field in the Reddit JSON sometimes. However, from is a keyword # in Python and therefore can't be used as an attribute name. I opened an issue on # this: https://github.com/elastic/elasticsearch-dsl-py/issues/1345 # from = Keyword() # noqa: E800 # For sample, always None if it exists. from_id = Keyword() # For sample, always None if it exists. from_kind = Keyword() # For sample, always None if it exists. @classmethod @overrides def prepare_doc_dict(cls, doc_dict: MutableMapping[str, object]) -> None: super().prepare_doc_dict(doc_dict) doc_dict["_id"] = "t1_" + checked_cast(str, doc_dict["id"]) # "crosspost_parent_list" contains the whole JSON dict of the post this post # is cross-posting somewhere. For simplicity of the data model we discard this # here, at the cost of a single ID-lookup to the index should it be needed # later. doc_dict.pop("crosspost_parent_list", None)
class TwitterEntitiesMediaVideoInfo(InnerDoc): aspect_ratio = Short(multi=True) duration_millis = Integer() variants = Nested(TwitterEntitiesMediaVideoInfoVariant)
class Politicians(Document): source = Nested(Source) unidade_eleitoral = Nested(UnidadeEleitoral) foto_url = Text() # CSV fields ano_eleicao = Integer(required=True) cd_cargo = Text(fields={'keyword': Keyword()}) cd_cor_raca = Text(fields={'keyword': Keyword()}) cd_estado_civil = Text(fields={'keyword': Keyword()}) codigo_legenda = Text(fields={'keyword': Keyword()}) cd_municipio_nascimento = Text(fields={'keyword': Keyword()}) cd_nacionalidade = Text(fields={'keyword': Keyword()}) cd_ocupacao = Text(fields={'keyword': Keyword()}) cd_genero = Text(fields={'keyword': Keyword()}) cd_grau_instrucao = Text(fields={'keyword': Keyword()}) cd_situacao_candidatura = Text(fields={'keyword': Keyword()}) cd_sit_tot_turno = Text(fields={'keyword': Keyword()}) composicao_legenda = Text(fields={'keyword': Keyword()}) nr_cpf_candidato = Text(fields={'keyword': Keyword()}) dt_geracao = Date() dt_nascimento = Text(fields={'keyword': Keyword()}) ds_cargo = Text(fields={'keyword': Keyword()}) ds_cor_raca = Text(fields={'keyword': Keyword()}) ds_eleicao = Text(fields={'keyword': Keyword()}) ds_estado_civil = Text(fields={'keyword': Keyword()}) ds_grau_instrucao = Text(fields={'keyword': Keyword()}) ds_nacionalidade = Text(fields={'keyword': Keyword()}) ds_ocupacao = Text(fields={'keyword': Keyword()}) ds_genero = Text(fields={'keyword': Keyword()}) nm_ue = Text(fields={'keyword': Keyword()}) ds_sit_tot_turno = Text(fields={'keyword': Keyword()}) nr_despesa_max_campanha = Text(fields={'keyword': Keyword()}) ds_situacao_candidatura = Text(fields={'keyword': Keyword()}) hr_geracao = Text(fields={'keyword': Keyword()}) idade_data_eleicao = Text(fields={'keyword': Keyword()}) nm_email = Text(fields={'keyword': Keyword()}) nm_candidato = Text(fields={'keyword': Keyword()}) nome_legenda = Text(fields={'keyword': Keyword()}) nm_municipio_nascimento = Text(fields={'keyword': Keyword()}) nm_partido = Text(fields={'keyword': Keyword()}) nm_urna_candidato = Text(fields={'keyword': Keyword()}) nr_candidato = Text(fields={'keyword': Keyword()}) nr_partido = Text(fields={'keyword': Keyword()}) nr_titulo_eleitoral_candidato = Text(fields={'keyword': Keyword()}) nr_turno = Text(fields={'keyword': Keyword()}) sq_candidato = Text(fields={'keyword': Keyword()}) sigla_legenda = Text(fields={'keyword': Keyword()}) sg_partido = Text(fields={'keyword': Keyword()}) sg_ue = Text(fields={'keyword': Keyword()}) sg_uf = Text(fields={'keyword': Keyword()}) sg_uf_nascimento = Text(fields={'keyword': Keyword()}) # 2018 cd_detalhe_situacao_cand = Integer() cd_eleicao = Integer() cd_tipo_eleicao = Short() ds_detalhe_situacao_cand = Text(fields={'keyword': Keyword()}) dt_eleicao = Text(fields={'keyword': Keyword()}) nm_social_candidato = Text(fields={'keyword': Keyword()}) nm_tipo_eleicao = Text(fields={'keyword': Keyword()}) nr_idade_data_posse = Short() nr_processo = Text(fields={'keyword': Keyword()}) nr_protocolo_candidatura = Text(fields={'keyword': Keyword()}) st_declarar_bens = Text(fields={'keyword': Keyword()}) st_reeleicao = Text(fields={'keyword': Keyword()}) tp_abrangencia = Text(fields={'keyword': Keyword()}) tp_agremiacao = Text(fields={'keyword': Keyword()}) @classmethod def set_index_name(cls, year): return f'{INDEX_NAME}-{year}' def save(self, **kwargs): kwargs['index'] = Politicians.set_index_name(self.ano_eleicao) return super(Politicians, self).save(**kwargs) @classmethod def bulk_save(cls, dicts): objects = (dict(d.to_dict(include_meta=True), **{'_index': cls.set_index_name(int(d.ano_eleicao))}) for d in dicts) client = connections.get_connection() return bulk(client, objects) @classmethod def bulk_update(cls, dicts, client=None): def upsert(doc): d = doc.to_dict(True) d['_op_type'] = 'update' d['doc'] = d['_source'] d['doc_as_upsert'] = True del d['_source'] return d client = client or connections.get_connection() return bulk(client, (upsert(d) for d in dicts))
class TwitterEntitiesMediaOriginalInfo(InnerDoc): height = Short(doc_values=False, index=False) width = Short(doc_values=False, index=False) focus_rects = Nested(TwitterEntitiesMediaRect)
class TwitterEntitiesMediaRect(InnerDoc): x = Short(doc_values=False, index=False) y = Short(doc_values=False, index=False) h = Short(doc_values=False, index=False) w = Short(doc_values=False, index=False)
class TestEntry(DocType): timestamp = Date() symbol = Text(analyzer='standard', fields={'raw': Keyword()}) AverageDailyVolume = Long() BookValue = Double() Change_PercentChange = Text(analyzer='standard') Change = Double() Currency = Text(analyzer='standard', fields={'raw': Keyword()}) DividendShare = Double() LastTradeDate = Date() EarningsShare = Double() EPSEstimateCurrentYear = Double() EPSEstimateNextYear = Double() EPSEstimateNextQuarter = Double() DaysLow = Double() DaysHigh = Double() YearLow = Double() YearHigh = Double() MarketCapitalization = Text(analyzer='standard') EBITDA = Text(analyzer='standard') ChangeFromYearLow = Double() PercebtChangeFromYearLow = Text(analyzer='standard') LastTradeWithTime = Text(analyzer='standard') LastTradePriceOnly = Double() DaysRange = Text(analyzer='standard') FiftydayMovingAverage = Double() TwoHundreddayMovingAverage = Double() ChangeFromTwoHundreddayMovingAverage = Double() PercentChangeFromTwoHundreddayMovingAverage = Text(analyzer='standard') ChangeFromFiftydayMovingAverage = Double() PercentChangeFromFiftydayMovingAverage = Text(analyzer='standard') Name = Text(analyzer='standard', fields={'raw': Keyword()}) Open = Double() PreviousClose = Double() ChangeinPercent = Text(analyzer='standard') PriceSales = Double() PriceBook = Double() ExDividendDate = Date() PERatio = Double() DividendPayDate = Date() PEGRatio = Double() PriceEPSEstimateCurrentYear = Double() PriceEPSEstimateNextYear = Double() ShortRatio = Double() LastTradeTime = Date() OneyrTargetPrice = Double() Volume = Long() YearRange = Text(analyzer='standard') StockExchange = Text(analyzer='standard', fields={'raw': Keyword()}) DividendYield = Double() PercentChange = Text(analyzer='standard') RiskModelScoreInit = Short() RiskModelScoreCurrent = Short() PEGScoreInit = Short() PEGScoreCurrent = Short() DivScoreInit = Short() DivScoreCurrent = Short() ShortScoreInit = Short() ShortScoreCurrent = Short() PriceScoreInit = Short() PriceStoreCurrent = Short() OverseasScoreInit = Short() OverseasScoreCurrent = Short() DivYieldInit = Double() NumOfSharesInit = Double() NumOfSharesCurrent = Double() PriceInit = Double() TotalValueInit = Double() TotalValueCurrent = Double() PercentOfTotalPortfolioValue = Double() PercentGainSinceInception = Double() TotalPortfolioValueInit = Double() TotalPortfolioValueCurrent = Double() TotalPortfolioGainSinceInception = Double() stored_at = Date() class Meta: index = 'portfolio' def save(self, ** kwargs): self.RiskModelScoreCurrent = self.PEGScoreCurrent + self.DivScoreCurrent + self.ShortScoreCurrent + self.PriceScoreCurrent + self.OverseasScoreCurrent self.TotalValueCurrent = self.NumOfSharesCurrent * self.LastTradePriceOnly self.PercentOfTotalPortfolioValue = self.TotalValueCurrent / self.TotalPortfolioValueCurrent self.PercentGainSinceInception = (self.TotalValueCurrent - self.TotalValueInit) / self.TotalValueInit self.TotalPortfolioGainSinceInception = (self.TotalPortfolioValueCurrent - self.TotalPortfolioValueInit) / self.TotalPortfolioValueInit self.stored_at = datetime.datetime.now() return super(TestEntry, self).save(** kwargs)
class Recipe(Document): """Python representation of a Recipe document in Elasticsearch. Args: name: A string, the recipe title. ingredients: A list of strings, the ingredients of the recipe. url: A string, the URL from where the recipe was sourced source: A string, the original publisher of the recipe calories: An int, the calorie count of the recipe. randint(0, 1400) carbohydrate: An int, the carb count of the recipe. randint(0, 75) fat: An int, the fat count of the recipe. randint(0, 100) protein: An int, the protein count of the recipe. randint(0, 50) image: A optional string, the URL for an image of the recipe cookTime: An optional string, the cook time recipeYield: An optional string, the recipe yield datePublished: An optional string, the original publish date prepTime: An optional string, the prep time description: An optional string, the recipe pretext/description totalTime: An optional string, the total cook/prep time creator: An optional string, the original author of the recipe recipeCategory: An optional string, the type of recipe recipeInstructions: An optional string, the recipe instructions tags: An optional string array containing any of ["vegetarian", "vegan", "gluten-free"] """ # These fields should be identical to those in recipe-db/loading-scripts/recipe-mapping.json name = Text(fields={"keyword": Keyword()}) ingredients = Text(fields={"keyword": Keyword()}) url = Text(fields={"keyword": Keyword()}) source = Text(fields={"keyword": Keyword()}) calories = Short() carbohydrate = Short() fat = Short() protein = Short() image = Text(fields={"keyword": Keyword()}) cookTime = Text(fields={"keyword": Keyword()}) recipeYield = Text(fields={"keyword": Keyword()}) datePublished = Text(fields={"keyword": Keyword()}) prepTime = Text(fields={"keyword": Keyword()}) description = Text(fields={"keyword": Keyword()}) totalTime = Text(fields={"keyword": Keyword()}) creator = Text(fields={"keyword": Keyword()}) recipeCategory = Text(fields={"keyword": Keyword()}) recipeInstructions = Text(fields={"keyword": Keyword()}) tags = Text(fields={"keyword": Keyword()}) # The Index inner class is where we define connection config class Index: name = "recipes" @classmethod def _get_using(cls, using=None): """Override base method for specifying our current Elasticsearch connection""" return current_app.elasticsearch def get_image_url(self, use_google=False): """Return a URL for an image of this recipe. It will try to return the OpenRecipes scraped image if it exists, else it will do a Google image search, else it will return a default placeholder image. Args: use_google: If true, will make an API call to Google images for missing images, else it will skip this step (for API quota purposes). Returns: A string URL which can be GET requested to obtain an image """ # First try the OpenRecipes image try: response = requests.head(self.image, allow_redirects=True) if response.status_code == 200: return self.image except Exception: # e.g. timeout pass # Then try the first Google Image search result if use_google: try: google_image_search = GoogleImagesSearch(None, None) google_image_search.search( search_params={ "q": self.name, "num": 1, } ) return google_image_search.results()[0].url except Exception: # e.g. API quota limit reached pass # Else return our default image return url_for("static", filename="images/default_recipe_image.jpg") # THESE ARE SAMPLE METHODS FOR YOU TO GET DATA FROM @classmethod def get_single_recipe(cls): """Return a single Recipe object from Elasticsearch Returns: A Recipe object. """ return cls.search().execute()[0] @classmethod def get_multi_recipe_paged(cls, page=0, per_page=10): """Return a list of Recipes, considering pagination Usage: >>> # Default options just gets you the first 10 recipes >>> recipes_0 = Recipe.get_multi_recipe_paged() #page=0 >>> # Get next set of results by specifying the page >>> recipes_1 = Recipe.get_multi_recipe_paged(page=1) >>> # Get more results by changing page size >>> recipes_0_4 = Recipe.get_multi_recipe_paged(per_page=50) Args: page: The page of results to get per_page: The size of each page of results to get Returns: A list of Recipe object """ return list(cls.search()[page * per_page : (page + 1) * per_page].execute()) # TODO: CUSTOM SEARCH METHODS @classmethod def get_recipe_by_id(cls, recipe_id): """Return a single Recipe object from Elasticsearch by its ID Args: recipe_id: The ID of the recipe to get. Returns: The Recipe object corresponding to the given ID, or None if not found """ try: return cls.get(recipe_id) except Exception: return None @classmethod def get_recipes_by_criteria(cls, page=0, per_page=10, **criteria): """Advanced search wrapper for Recipes. An example set of criteria is as follows: e.g. criteria = { "query": "dip", "ingredients": "olive oil, garlic", "tags": ["gluten-free", "vegetarian"], "minCalories": 0, "maxCalories": 100, "minCarbs": 0, "maxCarbs": 100, "minProteins": 0, "maxProteins": 100, "minFats": 0, "maxFats": 100, } Note that all of the items are optional and will be ignored if omitted or if falsy values are provided (e.g. False, None, [], {}, "") Usage:: >>> # e.g. direct kwargs >>> Recipe.get_recipes_by_criteria(query="dip", tags=["vegetarian"]).execute() >>> # e.g. splat kwargs >>> criteria = {"query":"dip, "tags":["vegetarian"]} >>> Recipe.get_recipes_by_criteria(**criteria).execute() Args: page: The page of results to get per_page: The size of each page of results to get critiera: kwargs of the below query: The recipe name to (partly) match ingredients: List of ingredients the recipe should contain (any) tags: List of tags the recipe should match calories: Integer tuple range carbohydrate: Integer tuple range fat: Integer tuple range protein: Integer tuple range Returns: An elasticsearch_dsl.Search object, which you can get a list of recipes out of by doing list(search_object.execute()) """ search = cls.search()[page * per_page : (page + 1) * per_page] if criteria.get("query"): search = search.query( Q("fuzzy", name=criteria.get("query")) | Q("match", name=criteria.get("query")) ) if criteria.get("ingredients"): ingredients = criteria.get("ingredients") if isinstance(ingredients, str): ingredients = [i.strip() for i in ingredients.split(",")] search = search.query("terms", ingredients=ingredients) if criteria.get("tags"): search = search.filter( "terms_set", tags__keyword={ "terms": criteria.get("tags"), "minimum_should_match_script": {"source": "params.num_terms"}, }, ) if criteria.get("minCalories"): search = search.filter( "range", calories={"gte": criteria.get("minCalories")} ) if criteria.get("maxCalories"): search = search.filter( "range", calories={"lte": criteria.get("maxCalories")} ) if criteria.get("minCarbs"): search = search.filter("range", calories={"gte": criteria.get("minCarbs")}) if criteria.get("maxCarbs"): search = search.filter("range", calories={"lte": criteria.get("maxCarbs")}) if criteria.get("minProteins"): search = search.filter( "range", calories={"gte": criteria.get("minProteins")} ) if criteria.get("maxProteins"): search = search.filter( "range", calories={"lte": criteria.get("maxProteins")} ) if criteria.get("minFats"): search = search.filter("range", calories={"gte": criteria.get("minFats")}) if criteria.get("maxFats"): search = search.filter("range", calories={"lte": criteria.get("maxFats")}) return search @classmethod def get_recipe_suggestions(cls, prefix): search = cls.search() search = search.query( Q("match_phrase_prefix", name=prefix) | Q("prefix", name=prefix) ) return search
class DocTestSSLResult(Document): source = Text(fields={'raw': Keyword()}) result = Boolean() timestamp = Date() ip = Keyword() hostname = Keyword() port = Integer() svcid = Keyword() protocols = Keyword(multi=True) ciphers = Text(multi=True, fields={'raw': Keyword()}) ciphertests = Keyword(multi=True) serverpref = Object( properties={ "cipher_order": Boolean(), "protocol": Keyword(), "cipher": Text(fields={'raw': Keyword()}) }) cert = Object( properties={ "keysize": Short(), "signalgo": Text(fields={'raw': Keyword()}), "md5_fingerprint": Keyword(), "sha1_fingerprint": Keyword(), "sha256_fingerprint": Keyword(), "cn": Text(fields={'raw': Keyword()}), "san": Text(multi=True, fields={'raw': Keyword()}), "issuer": Text(fields={'raw': Keyword()}), "ev": Boolean(), "expiration": Date(), "ocsp_uri": Text(fields={'raw': Keyword()}), "Crl_url": Text(fields={'raw': Keyword()}), "ocsp_stapling": Boolean(), }) vulnerabilities = Keyword(multi=True) def parseCSVLine(self, line): if line['id'] == "id": return if not self.ip or not self.hostname or not self.port: # host, ip and port m = reIpHostColumn.search(line['fqdn/ip']) if m: self.hostname, self.ip = m.groups() self.port = int(line['port']) if reProtocol.search(line['id']) and reOffers.search( line['finding']): # protocols self.result = True m = reProtocol.search(line['id']) if m: self.protocols.append(line['id'].upper()) elif reCipherColumnName.search(line['id']): # ciphers IT WORKS m = reCipherDetails.search(line['finding']) if m: self.ciphers.append(m.group(1)) elif reCipherTests.search(line['id']) and reVulnerable.search( line['finding']): # cipher tests m = reCipherTests.search(line['id']) print(m) if m: self.ciphertests.append(m.group(1)) if line['id'] == "cipher_order": # server prefers cipher IT WORKS self.serverpref.cipher_order = bool(reOk.search(line['severity'])) elif line[ 'id'] == "protocol_negotiated": # preferred protocol IT WORKS m = reDefaultProtocol.search(line['finding']) if m: self.serverpref.protocol = m.group(1) elif line['id'] == "cipher_negotiated": # preferred cipher IT WORKS m = reDefaultCipher.search(line['finding']) if m: self.serverpref.cipher = m.group(1) elif line['id'] == "cert_keySize": # certificate key size IT WORKS m = reKeySize.search(line['finding']) if m: self.cert.keysize = int(m.group(1)) elif line[ 'id'] == "cert_signatureAlgorithm": # certificate sign algorithm IT WORKS m = reSignAlgorithm.search(line['finding']) if m: self.cert.signalgo = m.group(1) elif line[ 'id'] == "cert_fingerprintSHA1": # certificate fingerprints SHA1 IT WORKS m = reFPSHA1.search(line['finding']) if m: self.cert.sha1_fingerprint = m.group(1) elif line[ 'id'] == "cert_fingerprintSHA256": # certificate fingerprints SHA256 IT WORKS m = reFPSHA256.search(line['finding']) if m: self.cert.sha256_fingerprint = m.group(1) elif line[ 'id'] == "cert_fingerprintMD5": # certificate fingerprints MD5 IT WORKS m = reFPMD5.search(line['finding']) if m: self.cert.md5_fingerprint = m.group(1) elif line['id'] == "cert_commonName": # certificate CN IT WORKS m = reCN.search(line['finding']) if m: self.cert.cn = m.group(1) elif line[ 'id'] == "cert_subjectAltName": # certificate SAN KINDA WORKS NEEDS REVISION m = reSAN.search(line['finding']) #print(m) if m: self.cert.san = m.group(1) #sans = m.group(1) #for san in sans.split(" "): # if san != "--": # self.cert.san.append(san)""" elif line['id'] == "cert_caIssuers": # certificate issuer IT WORKS m = reIssuer.search(line['finding']) if m: self.cert.issuer = m.group(1) elif line['id'] == "ev": # certificate extended validation NOT SUERE self.cert.ev = bool(reYes.search(line['finding'])) elif line['id'] == "cert_notAfter": # certificate expiration IT WORKS m = reExpiration.search(line['finding']) if m: unparsedDate = m.group(1) self.cert.expiration = datetime.strptime( unparsedDate, "%Y-%m-%d %H:%M") elif line[ 'id'] == "cert_ocspURL": # certificate OCSP URI IT WORKS ELSE NEEDS REWORK m = reOCSPURI.search(line['finding']) #print(m) if m: self.cert.ocsp_uri = m.group(1) else: self.cert.ocsp_uri = "-" elif line[ 'id'] == "cert_crlDistributionPoints": # certificate CRL WORKS m = reAll.search(line['finding']) #print(m) if m: self.cert.Crl_url = m.group(1) else: self.cert.Crl_url = "-" elif line['id'] == "OCSP_stapling": # certificate OCSP stapling self.cert.ocsp_stapling = not bool( reNotOffered.search(line['finding'])) elif line['id'] in ("heartbleed", "CCS", "secure_renego", "secure_client_renego", "CRIME_TLS", "SWEET32", "POODLE_SSL", "fallback_SCSV", "FREAK", "DROWN", "LOGJAM", "BEAST", "LUCKY13", "RC4") and reVulnerable.search(line['severity']): m = reVulnerable.search(line['severity']) if str(m.group(1)) != '': self.vulnerabilities.append(line['id'].upper()) def parseCSV(self, csvfile): if self.source: m = reDefaultFilename.search(self.source) if m: self.ip = m.group('ip') self.port = int(m.group('port') or 0) self.timestamp = datetime.strptime(m.group('datetime'), "%Y%m%d-%H%M") csvReader = csv.DictReader(csvfile, fieldnames=("id", "fqdn/ip", "port", "severity", "finding", "cve", "cwe"), delimiter=',', quotechar='"') for line in csvReader: self.parseCSVLine(line) def save(self, **kwargs): if not self.timestamp: self.timestamp = datetime.now(tz) if not self.port: raise ValueError("Empty scan result") self.svcid = "%s:%d" % (self.ip, int(self.port) or 0) if not self.result: self.result = False if 'debug' in kwargs and kwargs['debug']: pp.pprint(self.to_dict()) return super().save()
class DocHTTPRequestResponse(DocType): class Meta: doc_type = 'HTTPRequestResponse' timestamp = Date() protocol = Text() host = Keyword() port = Integer() request = Object( properties={ 'method': Keyword(), 'url': Text(fields={'keyword': Keyword()}), 'requestline': Text(fields={'keyword': Keyword()}), 'content_type': Text(fields={'keyword': Keyword()}), 'headernames': Text(analyzer=identifierAnalyzer, multi=True, fields={'keyword': Keyword()}), 'headers': Nested( properties={ 'name': Text(analyzer=identifierAnalyzer, fields={'keyword': Keyword()}), 'value': Text(fields={'keyword': Keyword()}) }), 'parameternames': Text(analyzer=identifierAnalyzer, multi=True, fields={'keyword': Keyword()}), 'parameters': Nested( properties={ 'type': Keyword(), 'name': Text(analyzer=identifierAnalyzer, fields={'keyword': Keyword()}), 'value': Text(fields={'keyword': Keyword()}) }), 'body': Text(include_in_all=False) }) response = Object( properties={ 'status': Short(), 'responseline': Text(fields={'keyword': Keyword()}), 'content_type': Text(fields={'keyword': Keyword()}), 'inferred_content_type': Text(fields={'keyword': Keyword()}), 'headernames': Text(analyzer=identifierAnalyzer, multi=True, fields={'keyword': Keyword()}), 'headers': Nested( properties={ 'name': Text(analyzer=identifierAnalyzer, fields={'keyword': Keyword()}), 'value': Text(fields={'keyword': Keyword()}) }), 'cookienames': Text(analyzer=identifierAnalyzer, multi=True, fields={'keyword': Keyword()}), 'cookies': Nested( properties={ 'domain': Text(fields={'keyword': Keyword()}), 'expiration': Date(fields={'keyword': Keyword()}), 'name': Text(analyzer=identifierAnalyzer, fields={'keyword': Keyword()}), 'path': Text(fields={'keyword': Keyword()}), 'value': Text(fields={'keyword': Keyword()}) }), 'body': Text(include_in_all=False), 'doctype': Text(multi=True, fields={'keyword': Keyword()}), 'base': Text(multi=True, fields={'keyword': Keyword()}), 'stylesheets': Text(multi=True, fields={'keyword': Keyword()}), 'frames': Text(multi=True, fields={'keyword': Keyword()}), 'scripts': Text(multi=True, fields={'keyword': Keyword()}), 'links': Text(multi=True, fields={'keyword': Keyword()}), 'images': Text(multi=True, fields={'keyword': Keyword()}), 'audio': Text(multi=True, fields={'keyword': Keyword()}), 'video': Text(multi=True, fields={'keyword': Keyword()}), 'objects': Text(multi=True, fields={'keyword': Keyword()}), 'formactions': Text(multi=True, fields={'keyword': Keyword()}), 'extrefs': Text(multi=True, fields={'keyword': Keyword() }), # all external references }) def add_request_header(self, header): parsed = parse_header(header) self.request.headers.append(parsed) self.request.headernames.append(parsed['name']) def add_response_header(self, header): parsed = parse_header(header) self.response.headers.append(parsed) self.response.headernames.append(parsed['name']) def add_parsed_request_header(self, name, value): self.request.headers.append({"name": name, "value": value}) self.request.headernames.append(name) def add_parsed_response_header(self, name, value): self.response.headers.append({"name": name, "value": value}) self.response.headernames.append(name) def add_request_parameter(self, typename, name, value): param = {'type': typename, 'name': name, 'value': value} self.request.parameters.append(param) self.request.parameternames.append(param['name']) def add_response_cookie(self, name, value, domain=None, path=None, expiration=None): cookie = { 'name': name, 'value': value, 'domain': domain, 'path': path, 'expiration': expiration } self.response.cookies.append(cookie) self.response.cookienames.append(cookie['name']) def save(self, storeResponseBody=True, **kwargs): if not self.timestamp: self.timestamp = datetime.now( tz ) # TODO: timestamp options: now (as is), request and response if self.response.body and ( (self.response.inferred_content_type and self.response.inferred_content_type == "HTML") or (not self.response.inferred_content_type and "HTML" in self.response.content_type or "html" in self.response.content_type)): parser = WASEHTMLParser() parser.feed(self.response.body) parser.close() self.response.doctype = list(parser.doctype) self.response.base = list(parser.base) self.response.stylesheets = list(parser.stylesheets) self.response.frames = list(parser.frames) self.response.scripts = list(parser.scripts) self.response.links = list(parser.links) self.response.images = list(parser.images) self.response.audio = list(parser.audio) self.response.video = list(parser.video) self.response.objects = list(parser.objects) self.response.formactions = list(parser.formactions) self.response.extrefs = list(parser.extrefs) if not storeResponseBody: self.response.body = None return super(DocHTTPRequestResponse, self).save(**kwargs)
class TwitterEntitiesMediaSize(InnerDoc): h = Short(doc_values=False, index=False) w = Short(doc_values=False, index=False) resize = Keyword(doc_values=False, index=False)
class CardNameIndex(Document): datum = Text(norms=False) naam = Text(norms=False) inhoud = Text(norms=False) bron = Text(norms=False) getuigen = Text(norms=False) bijzonderheden = Text(norms=False) naam_keyword = Keyword() jaar = Short() class Index: name = 'namenindex' def __new__(cls): return Index(name=cls.name) @classmethod def from_csv_line(cls, line: List[str]) -> 'CardNameIndex': doc = cls() if len(line[0]) == 0: return doc doc.meta.id = int(line[0]) doc.datum = cls.parse_entry(line[1]) doc.naam = cls.parse_entry(line[2]) doc.inhoud = cls.parse_entry(line[3]) doc.bron = cls.parse_entry(line[4]) doc.getuigen = cls.parse_entry(line[5]) doc.bijzonderheden = cls.parse_entry(line[6]) if not doc.is_valid(): return doc if doc.naam is not None: doc.naam_keyword = cls.create_name_keyword(str(doc.naam)) if doc.datum is not None: doc.jaar = cls.create_year(str(doc.datum)) return doc def is_valid(self): # At the end of a file there may be empty lines, skip them. if getattr(self.meta, 'id', None) is None: return False # Skip row if there is no data except an id. This happens a lot at the end of a file. if self.naam is None and self.datum is None: return False return True @staticmethod def parse_entry(entry: str) -> Optional[str]: return entry.strip() or None @staticmethod def create_name_keyword(naam: str) -> str: """Get a single keyword from the name field.""" # todo: fix this one: Albrecht (St), van if len(naam.split(',')) >= 2: return naam.split(',')[0] elif len(naam.split('~')) >= 2: return naam.split('~')[0] elif len(naam.split(' ')) >= 2: return naam.split(' ')[0] else: return naam @staticmethod def create_year(datum: str) -> Optional[int]: """Parse a year from the datum field.""" if datum is None or len(datum) < 4 or not datum[:4].isdigit(): return None jaar = int(datum[:4]) if 1000 < jaar < 2000: return jaar return None
class DocTestSSLResult(DocType): class Meta: doc_type = "TestSSLResult" source = String(fields={'raw': String(index='not_analyzed')}) result = Boolean() timestamp = Date() ip = String(index='not_analyzed') hostname = String(index='not_analyzed') port = Integer() svcid = String(index='not_analyzed') protocols = String(index='not_analyzed', multi=True) ciphers = String(multi=True, fields={'raw': String(index='not_analyzed')}) ciphertests = String(index='not_analyzed', multi=True) serverpref = Object( properties = { "cipher_order": Boolean(), "protocol": String(index='not_analyzed'), "cipher": String(fields={'raw': String(index='not_analyzed')}) }) cert = Object( properties = { "keysize": Short(), "signalgo": String(fields={'raw': String(index='not_analyzed')}), "md5_fingerprint": String(index='not_analyzed'), "sha1_fingerprint": String(index='not_analyzed'), "sha256_fingerprint": String(index='not_analyzed'), "cn": String(fields={'raw': String(index='not_analyzed')}), "san": String(multi=True, fields={'raw': String(index='not_analyzed')}), "issuer": String(fields={'raw': String(index='not_analyzed')}), "ev": Boolean(), "expiration": Date(), "ocsp_uri": String(fields={'raw': String(index='not_analyzed')}), "ocsp_stapling": Boolean(), }) vulnerabilities = String(index='not_analyzed', multi=True) def parseCSVLine(self, line): if line['id'] == "id": return if not self.ip or not self.hostname or not self.port: # host, ip and port m = reIpHostColumn.search(line['host']) if m: self.hostname, self.ip = m.groups() self.port = int(line['port']) if reProtocol.search(line['id']) and reOffers.search(line['finding']): # protocols self.result = True m = reProtocol.search(line['id']) if m: self.protocols.append(line['id'].upper()) elif reCipherColumnName.search(line['id']): # ciphers m = reCipherDetails.search(line['finding']) if m: self.ciphers.append(m.group(1)) elif reCipherTests.search(line['id']) and reVulnerable.search(line['finding']): # cipher tests m = reCipherTests.search(line['id']) if m: self.ciphertests.append(m.group(1)) elif line['id'] == "order": # server prefers cipher self.serverpref.cipher_order = bool(reOk.search(line['finding'])) elif line['id'] == "order_proto": # preferred protocol m = reDefaultProtocol.search(line['finding']) if m: self.serverpref.protocol = m.group(1) elif line['id'] == "order_cipher": # preferred cipher m = reDefaultCipher.search(line['finding']) if m: self.serverpref.cipher = m.group(1) elif line['id'] == "key_size": # certificate key size m = reKeySize.search(line['finding']) if m: self.cert.keysize = int(m.group(1)) elif line['id'] == "algorithm": # certificate sign algorithm m = reSignAlgorithm.search(line['finding']) if m: self.cert.signalgo = m.group(1) elif line['id'] == "fingerprint": # certificate fingerprints m = reFPMD5.search(line['finding']) if m: self.cert.md5_fingerprint = m.group(1) m = reFPSHA1.search(line['finding']) if m: self.cert.sha1_fingerprint = m.group(1) m = reFPSHA256.search(line['finding']) if m: self.cert.sha256_fingerprint = m.group(1) elif line['id'] == "cn": # certificate CN m = reCN.search(line['finding']) if m: self.cert.cn = m.group(1) elif line['id'] == "san": # certificate SAN m = reSAN.search(line['finding']) if m: sans = m.group(1) for san in sans.split(" "): if san != "--": self.cert.san.append(san) elif line['id'] == "issuer": # certificate issuer m = reIssuer.search(line['finding']) if m: self.cert.issuer = m.group(1) elif line['id'] == "ev": # certificate extended validation self.cert.ev = bool(reYes.search(line['finding'])) elif line['id'] == "expiration": # certificate expiration m = reExpiration.search(line['finding']) if m: unparsedDate = m.group(1) self.cert.expiration = datetime.strptime(unparsedDate, "%Y-%m-%d %H:%M %z") elif line['id'] == "ocsp_uri": # certificate OCSP URI m = reOCSPURI.search(line['finding']) if m: self.cert.ocsp_uri = m.group(1) else: self.cert.ocsp_uri = "-" elif line['id'] == "ocsp_stapling": # certificate OCSP stapling self.cert.ocsp_stapling = not bool(reNotOffered.search(line['finding'])) elif line['id'] in ("heartbleed", "ccs", "secure_renego", "sec_client_renego", "crime", "breach", "poodle_ssl", "fallback_scsv", "freak", "DROWN", "logjam", "beast", "rc4") and reVulnerable.search(line['finding']): self.vulnerabilities.append(line['id'].upper()) def parseCSV(self, csvfile): if self.source: m = reDefaultFilename.search(self.source) if m: self.ip = m.group('ip') self.port = int(m.group('port') or 0) self.timestamp = datetime.strptime(m.group('datetime'), "%Y%m%d-%H%M") csvReader = csv.DictReader(csvfile, fieldnames=("id", "host", "port", "severity", "finding"), delimiter=',', quotechar='"') for line in csvReader: self.parseCSVLine(line) def save(self, **kwargs): if not self.timestamp: self.timestamp = datetime.now(tz) if not self.port: raise ValueError("Empty scan result") self.svcid = "%s:%d" % (self.ip, int(self.port) or 0) if not self.result: self.result = False if 'debug' in kwargs and kwargs['debug']: pp.pprint(self.to_dict()) return super().save()