def handle(self, *args, **options): try: file_path = args[0] id_prefix = args[1] except IndexError: raise CommandError( 'First argument must be a source file and second is a id prefix' ) groups = defaultdict(list) with open(file_path, 'r', newline='', encoding='utf-8') as source: reader = csv.DictReader(source, delimiter=',') counter = 0 for row in reader: status_col = 'Status' if 'Status' in row else 'Статус' if row[status_col] == '' or row[status_col] == 'Ок': groups[row[self._group_column(row)]].append(row) counter += 1 self.stdout.write( 'Read {} valid rows from the input file'.format(counter)) Declaration.init() # Apparently this is required to init mappings declarations = map(self.merge_group, groups.values()) counter = 0 for declaration in declarations: mapped = self.map_fields(declaration, id_prefix) res = Declaration.search().filter( Term(general__last_name=mapped['general'] ['last_name'].lower().split('-')) & Term( general__name=mapped['general']['name'].lower().split('-')) & Term(intro__declaration_year=mapped['intro'] ['declaration_year'])) if mapped['general']['patronymic']: res = res.filter( Term(general__patronymic=mapped['general'] ['patronymic'].lower())) res = res.execute() if res.hits: self.stdout.write("%s (%s) already exists" % (mapped['general']['full_name'], mapped['intro']['declaration_year'])) mapped['_id'] = res.hits[0]._id item = Declaration(**mapped) item.save() counter += 1 self.stdout.write( 'Loaded {} items to persistence storage'.format(counter))
def sitemap(request): # TODO: REFACTOR ME? urls = [ reverse("wagtail_serve", args=[""]), reverse("wagtail_serve", args=["about/"]), reverse("wagtail_serve", args=["api/"]), reverse("regions_home"), ] search = Declaration.search().params(search_type="count") search.aggs.bucket('per_region', 'terms', field='general.post.region', size=0) for r in search.execute().aggregations.per_region.buckets: urls.append(reverse("region", kwargs={"region_name": r.key})) subsearch = Declaration.search()\ .filter( Term(general__post__region=r.key) & Not(Term(general__post__office='')))\ .params(search_type="count") subsearch.aggs.bucket('per_office', 'terms', field='general.post.office', size=0) for subr in subsearch.execute().aggregations.per_office.buckets: urls.append( reverse("region_office", kwargs={ "region_name": r.key, "office_name": subr.key })) search = Declaration.search().params(search_type="count") search.aggs.bucket('per_office', 'terms', field='general.post.office', size=0) for r in search.execute().aggregations.per_office.buckets: urls.append(reverse("office", kwargs={"office_name": r.key})) search = Declaration.search().extra(fields=[], size=100000) for r in search.execute(): urls.append(reverse("details", kwargs={"declaration_id": r._id})) return render(request, "sitemap.jinja", {"urls": urls}, content_type="application/xml")
def handle(self, *args, **options): try: file_path = args[0] except IndexError: raise CommandError('First argument must be a source file') with open(file_path, 'r', newline='', encoding='utf-8') as source: decls = json.load(source) counter = 0 Declaration.init() # Apparently this is required to init mappings for row in decls: if "fields" not in row["details"]: continue mapped = self.map_fields(row) res = Declaration.search().filter( Term(general__last_name=mapped['general'] ['last_name'].lower().split('-')) & Term(general__name=mapped['general'] ['name'].lower().split('-')) & Term(intro__declaration_year=int(mapped['intro'] ['declaration_year']))) if mapped['general']['patronymic']: res = res.filter( Term(general__patronymic=mapped['general'] ['patronymic'].lower())) self.stdout.write("Checking %s (%s)" % (mapped['general']['full_name'], mapped['intro']['declaration_year'])) res = res.execute() if not res.hits: item = Declaration(**mapped) item.save() counter += 1 else: self.stdout.write("%s (%s) already exists" % (mapped['general']['full_name'], mapped['intro']['declaration_year'])) self.stdout.write( 'Loaded {} items to persistence storage'.format(counter))
def region(request, region_name): search = Declaration.search()\ .filter( Term(general__post__region=region_name) & Not(Term(general__post__office='')))\ .params(search_type="count") search.aggs.bucket('per_office', 'terms', field='general.post.office', size=0) res = search.execute() return { 'facets': res.aggregations.per_office.buckets, 'region_name': region_name }
def groups_filter_from_query(query, field_map={}): """Creates an F object for the groups of a search query.""" f = None # filter groups for group in query.get("groups", []): group_f = MatchAll() for condition in group.get("conditions", []): field_name = condition["field"] field_name = field_map.get(field_name, field_name) operation = condition["type"] values = condition["values"] if values: values = [v["value"] for v in values] if operation == "all": # NOTE: is there a better way to express this? for value in values: if "." in field_name: path = field_name.split(".")[0] group_f &= Nested( path=path, filter=Term(**{field_name: value})) else: group_f &= Term(**{field_name: value}) elif operation == "any": if "." in field_name: path = field_name.split(".")[0] group_f &= Nested(path=path, filter=Terms(**{field_name: values})) else: group_f &= Terms(**{field_name: values}) elif operation == "none": if "." in field_name: path = field_name.split(".")[0] group_f &= ~Nested( path=path, filter=Terms(**{field_name: values})) else: group_f &= ~Terms(**{field_name: values}) date_range = group.get("time") if date_range: group_f &= date_range_filter(date_range) if f: f |= group_f else: f = group_f return f
def get_condition_filter(condition, field_map={}): """ Return the appropriate filter for a given group condition. # TODO: integrate this into groups_filter_from_query function. """ field_name = condition.get("field") field_name = field_map.get(field_name, field_name) operation = condition["type"] values = condition["values"] condition_filter = MatchAll() if values: values = [v["value"] for v in values] if operation == "all": for value in values: if "." in field_name: path = field_name.split(".")[0] condition_filter &= Nested( path=path, filter=Term(**{field_name: value})) else: condition_filter &= Term(**{field_name: value}) elif operation == "any": if "." in field_name: path = field_name.split(".")[0] condition_filter &= Nested( path=path, filter=Terms(**{field_name: values})) else: condition_filter &= Terms(**{field_name: values}) elif operation == "none": if "." in field_name: path = field_name.split(".")[0] condition_filter &= ~Nested( path=path, filter=Terms(**{field_name: values})) else: condition_filter &= ~Terms(**{field_name: values}) else: raise ValueError( """ES conditions must be one of the following values: ['all', 'any', 'none']""" ) return condition_filter
def region(request, region_name): search = Declaration.search()\ .filter( Term(general__post__region=region_name) & Not(Term(general__post__office='')))\ .params(search_type="count") meta_data = MetaData.objects.filter(region_id=region_name, office_id=None).first() search.aggs.bucket('per_office', 'terms', field='general.post.office', size=0) res = search.execute() return { 'facets': res.aggregations.per_office.buckets, 'region_name': region_name, 'title': meta_data.title if meta_data else "", 'meta_desc': meta_data.description if meta_data else "", }
def handle(self, *args, **options): try: file_path = args[0] except IndexError: raise CommandError('First argument must be a source file') with open(file_path, 'r', newline='', encoding='utf-8') as source: reader = csv.DictReader(source, delimiter=";") counter = 0 Declaration.init() # Apparently this is required to init mappings for row in reader: mapped = self.map_fields(row) res = Declaration.search().filter( Term(general__last_name=mapped["general"] ["last_name"].lower().split("-")) & Term(general__name=mapped["general"] ["name"].lower().split("-")) & Term(intro__declaration_year=mapped["intro"] ["declaration_year"])) if mapped["general"]["patronymic"]: res = res.filter( Term(general__patronymic=mapped["general"] ["patronymic"].lower())) res = res.execute() if res.hits: mapped["_id"] = res.hits[0]._id item = Declaration(**mapped) item.save() counter += 1 self.stdout.write( 'Loaded {} items to persistence storage'.format(counter))
def get_queryset(self): return super(RSSView, self).get_queryset().filter( # Exclude all SuperFeatures (until we ever decide to support them) Not(filter=Type( value=get_superfeature_model().search_objects.mapping.doc_type) )).filter(Term(**{'hide_from_rss': False}))
def InstantArticle(active=True): # noqa return Nested(path="feature_type", filter=Term(**{"feature_type.instant_article": active}))
def Status(status): # noqa if status: return Term(status=status) else: return MatchAll()
def Evergreen(evergreen=True): return Term(evergreen=evergreen)