示例#1
0
def query_imagesi(classnum_list, classval_list, upfilename):

    #print("In query_imagesi")
    hit1 = set()
    image_set = set()
    #print("11. classnum_list =", classnum_list)
    #print("12. classval_list =", classval_list)
    if (len(classnum_list) > 7):
        lenclassnum = 7

    QI = Q('match_all')
    s1 = Search(index='adknum')
    #s1 = Search(index='vgnum')
    classn = 1
    for class_num in classnum_list:
        if classn > 7:  #can make this 5--
            break
        classn = classn + 1
        QI = QI & Q('bool', must=[Q("match", classnum=class_num)])

    s1 = s1.query(QI).using(client)
    response = s1.execute()
    hit_num = 0
    simDict = {}
    similarityClass = np.zeros(15)
    for hit in s1.scan():
        print("123. hit.classnum: ", hit.classnum)
        print("124. hit.classval: ", hit.classval)
        lenimgclassnum = len(hit.classnum)
        simDict[hit.imgfile] = 1.0
        #similarityClass[lenimgclassnum]
        # Compute similarity and choose top 4 rather than random 4
        ii = 0
        similarityImg = 0.0
        for classi in classnum_list:
            jj = 0
            for classj in hit.classnum:
                if classi == int(classj):
                    similarityClass[ii] = classval_list[ii] / (abs(
                        (classval_list[ii] - int(hit.classval[jj]))) + 10)
                    #print("144. similarityClass[ii] = ", similarityClass[ii])
                    similarityImg = similarityImg + similarityClass[ii]
                    break
                jj = jj + 1
            simDict[hit.imgfile] = similarityImg
            #print("130. simDict[hit.imgfile] = ", simDict[hit.imgfile], similarityImg)
            ii = ii + 1

    #for key in sorted(simDict.keys(), reverse=True) :
    kk = 0
    for img in sorted(simDict, key=simDict.get, reverse=True):
        print("140. ", img, simDict[img])
        image_set.add(img)
        #image_set.add(hit.imgfile)
        #pick top 4 images (hit_nums)
        kk = kk + 1
        if kk > 3:
            break

    return display_image_set(image_set, upfilename, '')
示例#2
0
def query_images(object_list):
    #print("In query_images")
    hit1 = set()
    image_set = set()
    print("11. object_list =", object_list)

    QI = Q('match_all')
    #s1 = Search(index='bvgobjs_index')
    s1 = Search(index='idxo20')
    for objectk in object_list:
        print("objectk= ", objectk)
        QI = QI & Q("match", names=objectk)

    s1 = s1.query(QI).using(client)
    response = s1.execute()
    for hit in s1.scan():
        print("33 ", hit.imgfile)
        image_set.add(hit.imgfile)

    print("image_set = {0}".format(image_set))
    im = 0
    #app.layout = serve_layout
    images_div = []
    for image in image_set:
        if im > 3:
            break
        file, ext = os.path.splitext(image)
        image = file + '.png'
        print("66 image =", image)
        images_div.append(display_image(image))
        im = im + 1
    print("Please hit refresh...")
    # Here call callback -
    #serve_layout =
    app.layout = serve_layout(images_div)
示例#3
0
 def get(self, request):
     database_name = get_database_name(request.user)
     search_text = lower(request.GET["term"] or "")
     es = Elasticsearch(hosts=[{
         "host": ELASTIC_SEARCH_HOST,
         "port": ELASTIC_SEARCH_PORT
     }])
     search = Search(using=es, index=database_name, doc_type="reporter")
     search = search.extra(**{"size": "10"})
     resp = []
     if search_text:
         query_text_escaped = ElasticUtilsHelper().replace_special_chars(
             search_text)
         query_fields = [
             "name", "name_value", "name_exact", "short_code",
             "short_code_exact", "short_code_value"
         ]
         search = search.query("query_string",
                               query=query_text_escaped,
                               fields=query_fields)
         search_results = search.execute()
         resp = [{
             "id": result.short_code,
             "label": self.get_label(result)
         } for result in search_results.hits]
     return HttpResponse(json.dumps(resp))
示例#4
0
def query_imageso(object_list):
    print("In query_imageso")
    hit1 = set()
    image_set = set()
    print("11. object_list =", object_list)

    QI = Q('match_all')
    s1 = Search(index='idx0')
    for name in object_list:
        print("name= ", name)
        QI = QI & Q("match", names=name)

    s1 = s1.query(QI).using(client)
    response = s1.execute()
    for hit in s1.scan():
        image_set.add(hit.imgfile)

    return display_image_set(image_set, None, object_list)
示例#5
0
def create_search(
    must: list = None,
    should: list = None,
    filter_: list = None,
    must_not: list = None,
    source: dict = None,
    sort=None,
) -> Search:
    """
    Search index by construct query.

    Kwargs:
        must: list of the must satisfied query
        should: list of the should satisfied query
        sort: sort statement

    Return:
        Search object.
    """
    s = Search(index=INDEX)

    match_all = Q("match_all")

    must = must + [match_all] if must else [match_all]
    should = should if should else []
    filter_ = filter_ if filter_ else []
    must_not = must_not if must_not else []

    s = s.query("bool",
                must=must,
                should=should,
                filter=filter_,
                must_not=must_not)

    if sort:
        s = s.sort(sort)

    if source:
        s = s.source(**source)

    print(f"Query: {json.dumps(s.to_dict())}")

    return s
示例#6
0
def query_imagesi(classnum_list, upfilename):  #Disabled --
    #print("In query_imagesi")
    hit1 = set()
    image_set = set()
    #print("11. classnum_list =", classnum_list)

    QI = Q('match_all')
    s1 = Search(index='vgnum')
    classn = 1
    for class_num in classnum_list:
        if classn > 7:  #can make this 7--
            break
        classn = classn + 1
        print("class_num= ", class_num)
        QI = QI & Q('bool', must=[Q("match", classnum=class_num)])

    s1 = s1.query(QI).using(client)
    response = s1.execute()
    for hit in s1.scan():
        image_set.add(hit.imgfile)
    return display_image_set(image_set, upfilename, '')
示例#7
0
	def post(self):
		ts = self.args['_']
		if abs(int(time() * 1000) - int(ts)) > 1800000:
			return {'success':0, 'message': '时间戳无效'}, 200
		token = self.args['token']
		appkey = self.args['appkey']
		verify_token = flask_redis.get(appkey)
		if verify_token is None:
			return {'success': 0, 'message': 'token 无效'}, 200
		else:
			verify_token = verify_token.decode('utf-8') if type(verify_token) == type(b'') else verify_token
			if verify_token != token:
				return {'success': 0, 'message': 'token 无效'}, 200
		sign = self.args['sign']
		if hash_sha256("{0},{1},{2}".format(ts, token, appkey)) != sign:
			return {'success': 0, 'message': 'sign 无效'}, 200
		page = int(self.args['page'])
		size = int(self.args['size'])
		from_size = (page - 1) * size
		to_size = page * size
		channel = self.args['channel']
		category = self.args['category']
		location = self.args['location']
		times = self.args['times']
		and_ = self.args['and']
		sort_field = self.args['s']
		is_open = self.args['is_open']
		o = self.args['o']
		scope = self.args['scope']
		if scope in ['content', 'tag', 'title', 'description', 'author', 'writings']:
			scope = [scope]
		else:
			scope = ['content', 'tag', 'title', 'description', 'author', 'writings']
		keyword = self.args['keyword']
		try:
			s = Search(using=client, index='gdszx', doc_type='culture')
			if times:
				s = s.filter('term', times=times)
			if category:
				s = s.filter('term', category=category)
			if location:
				s = s.filter('term', location=location)
			if channel:
				s = s.filter('term', channel=channel)
			if is_open == '0':
				s = s.filter('term', is_open=False)
			elif is_open == '1':
				s = s.filter('term', is_open=True)
			s = s.query('multi_match', query=keyword, fields=scope)
			if and_ != None and and_.strip() != '':
				for word in split(r'\s+', and_.strip()):
					s = s.query('multi_match', query=word, fields=scope)
			s = s.highlight('title', fragment_size=50).highlight('content', fragment_size=100)
			s.aggs.bucket('times_all', 'terms', field='times', size=10)
			s.aggs.bucket('channel_all', 'terms', field='channel', size=10)
			s.aggs.bucket('category_all', 'terms', field='category', size=10)
			s.aggs.bucket('location_all', 'terms', field='location', size=10)
			s = s.sort(o+sort_field)
			s = s[from_size:to_size]
			response = s.execute()
			return {'success': 1, 'data': response.to_dict()}, 200
		except Exception as e:
			return {'success': 0, 'message': e}, 200
示例#8
0
	def post(self):
		ts = self.args['_']
		if abs(int(time() * 1000) - int(ts)) > 1800000:
			return {'success':0, 'message': '时间戳无效'}, 200
		token = self.args['token']
		appkey = self.args['appkey']
		verify_token = flask_redis.get(appkey)
		if verify_token is None:
			return {'success': 0, 'message': 'token 无效'}, 200
		else:
			verify_token = verify_token.decode('utf-8') if type(verify_token) == type(b'') else verify_token
			if verify_token != token:
				return {'success': 0, 'message': 'token 无效'}, 200
		sign = self.args['sign']
		if hash_sha256("{0},{1},{2}".format(ts, token, appkey)) != sign:
			return {'success': 0, 'message': 'sign 无效'}, 200
		query = Website.query.join(Token, Website.id==Token.website_id).filter(Token.appkey == appkey).first()
		domain = query.domain
		page = int(self.args['page'])
		size = int(self.args['size'])
		from_size = (page - 1) * size
		to_size = page * size
		origin = self.args['origin']
		channel = self.args['channel']
		category = self.args['category']
		author = self.args['author']
		editor = self.args['editor']
		begin = self.args['from']
		to = self.args['to']
		has_pic = self.args['has_pic']
		has_video = self.args['has_video']
		v1 = self.args['v1']
		v2 = self.args['v2']
		v3 = self.args['v3']
		v4 = self.args['v4']
		v5 = self.args['v5']
		v6 = self.args['v6']
		not_ = self.args['not']
		and_ = self.args['and']
		sort_field = self.args['s']
		o = self.args['o']
		f = self.args['f']
		l = self.args['l']
		l = [] if l is None else l.split(',')
		scope = self.args['scope']
		if scope in ['content', 'tag', 'title', 'description']:
			scope = [scope]
		else:
			scope = ['content', 'tag', 'title', 'description']
		keyword = self.args['keyword']
		try:
			s = Search(using=client, index='common', doc_type='search')
			s = s.filter('term', website=domain)
			if author:
				s = s.filter('term', author=author)
			if editor:
				s = s.filter('term', editor=editor)
			if origin:
				s = s.filter('term', origin=origin)
			if category:
				s = s.filter('term', category=category)
			if channel:
				s = s.filter('term', channel=channel)
			if v1:
				s = s.filter('term', reserved_1=v1)
			if v2:
				s = s.filter('term', reserved_2=v2)
			if v3:
				s = s.filter('term', reserved_3=v3)
			if v4:
				s = s.filter('term', reserved_4=v4)
			if v5:
				s = s.filter('term', reserved_5=v5)
			if v6:
				s = s.filter('term', reserved_6=v6)
			if has_pic == '0':
				s = s.filter('term', has_pic=False)
			elif has_pic == '1':
				s = s.filter('term', has_pic=True)
			if has_video == '0':
				s = s.filter('term', has_video=False)
			elif has_video == '1':
				s = s.filter('term', has_video=True)
			s = s.filter('range', pdate={'gte': begin, 'lte': to})

			s = s.query('multi_match', query=keyword, fields=scope)
			if not_:
				s = s.exclude('multi_match', query=not_, fields=scope)
			if and_ != None and and_.strip() != '':
				for word in split(r'\s+', and_.strip()):
					s = s.query('multi_match', query=word, fields=scope)
			s = s.highlight('title', fragment_size=50).highlight('content', fragment_size=100).highlight('tag', fragment_size=50).highlight('description', fragment_size=100)
			if f == 'title':
				s = s.exclude('terms', title__raw=l)
			elif f == 'url':
				s = s.exclude('terms', url=l)
			s = s.sort(o+sort_field)
			s = s[from_size:to_size]
			response = s.execute()
			related = related_search(client, keyword)
			return {'success': 1, 'data': response.to_dict(), 'related': related}, 200
		except Exception as e:
			return {'success': 0, 'message': e}, 200
    def put(self) -> dict:
        """
        search for a group in Elasticsearch

        Returns:
            dict -- search results
        """
        args = self.parser.parse_args()

        # init search
        search: Search = Group.search()

        search_query: dict = {
            "bool": {
                "should": [
                    {"query_string": {"query": args["query"], "fields": ["*"]}},
                    {
                        "nested": {
                            "path": "topics",
                            "score_mode": "avg",
                            "query": {
                                "bool": {
                                    "must": [
                                        {
                                            "query_string": {
                                                "query": args["query"],
                                                "fields": ["*"],
                                            }
                                        }
                                    ]
                                }
                            },
                        }
                    },
                    {
                        "nested": {
                            "path": "events",
                            "score_mode": "avg",
                            "query": {
                                "bool": {
                                    "must": [
                                        {
                                            "query_string": {
                                                "query": args["query"],
                                                "fields": ["*"],
                                            }
                                        }
                                    ]
                                }
                            },
                        }
                    },
                ],
                "must": [],
            }
        }

        # set event time filter
        if args["event_time_gte"] or args["event_time_lte"]:
            range_query: dict = {}
            if args["event_time_gte"]:
                range_query["gte"] = args["event_time_gte"]
            if args["event_time_lte"]:
                range_query["lte"] = args["event_time_lte"]

            search_query["bool"]["must"].append(
                {
                    "nested": {
                        "path": "events",
                        "score_mode": "avg",
                        "query": {
                            "bool": {"must": [{"range": {"events.time": range_query}}]}
                        },
                    }
                }
            )

        # set geo_distance filter
        if args["geo_distance"] and args["geo_lat"] and args["geo_lon"]:
            search_query["bool"]["must"].append(
                {
                    "nested": {
                        "path": "events",
                        "score_mode": "avg",
                        "query": {
                            "bool": {
                                "must": [
                                    {
                                        "geo_distance": {
                                            "distance": args["geo_distance"],
                                            "events.venue_location": {
                                                "lat": args["geo_lat"],
                                                "lon": args["geo_lon"],
                                            },
                                        }
                                    }
                                ]
                            }
                        },
                    }
                }
            )

        # pagination
        strat_entry: int = args["page"] * args["limit"]
        end_entry: int = strat_entry + args["limit"]
        search = search[strat_entry:end_entry]

        # sort
        if args["sort"]:
            search = Search().sort(args["sort"])

        # execute search
        search = search.query(Q(search_query))

        # set highlight score
        search.highlight_options(order="score")

        # load response from elasticsearch
        results: Response = search.execute()

        # get response
        found_groups: List[dict] = []
        map_center_lat: float = 0
        map_center_lon: float = 0
        for group in results.hits:

            group_dict: dict = {}
            if isinstance(group, Hit):
                group_object = Group.get_group(urlname=group.to_dict()["urlname"])
                group_dict = group_object.to_json_dict(load_events=args["load_events"])
            else:
                group_dict = group.to_json_dict(load_events=args["load_events"])

            if "venue_location_average" in group_dict:
                map_center_lat = (
                    map_center_lat + group_dict["venue_location_average"]["lat"]
                )
                map_center_lon = (
                    map_center_lon + group_dict["venue_location_average"]["lon"]
                )
            else:
                map_center_lat = map_center_lat + group_dict["location"]["lat"]
                map_center_lon = map_center_lon + group_dict["location"]["lon"]

            # add group dict to array
            found_groups.append(
                {**group_dict,}
            )

        if len(found_groups) > 0:
            map_center_lat = map_center_lat / len(found_groups)
            map_center_lon = map_center_lon / len(found_groups)

        return {
            "results": found_groups,
            "hits": results.hits.total["value"],
            "map_center": {"lat": map_center_lat, "lon": map_center_lon},
        }
示例#10
0
class Messenger:
    """
    Performs transformations on data

        eg. f(x) -> y

    Decoupled from the other factor network code,
    and can be swapped with other implementations
    """

    def __init__(self, config='cdr', size=2000):
        """
        :param url: str
            Fully qualified url to an elasticsearch instance
        :param size: int|
            Size limit to set on elasticsearch query
        """
        self.conn = connections.get_connection(config)
        self.elastic = Search('cdr', extra={'size': size})

    def match(self, match_type, **kwargs):
        return self.elastic.query(match_type, **kwargs).execute()

    @memoize
    def available(self, ad_id):
        """
        Get's the available factors for a particular ad

        :param ad_id: str
            Unique ad identifier

        :return: factors
        :rtype : list
        """
        accumulator = lambda x,y: x|y
        output      = self.match('match_phrase', _id=ad_id)
        keys        = [
            set(i['_source'].keys())
                for i in output.hits.hits
        ]
        return list(reduce(accumulator, keys, set()))

    def lookup(self, ad_id, field):
        """
        Get data from ad_id

        :param ad_id: str
            String to be queried
        """
        if not isinstance(ad_id, list):
            ad_id = [ad_id]

        results = self.elastic.query(Ids(values=ad_id)).execute()

        return set(flatten([
            hits['_source'][field] for hits in results.hits.hits
                if field in hits['_source']
        ]))


    def reverse_lookup(self, field, field_value):
        """
        Get ad_id from a specific field and search term

        :param field_value: str
            String to be queried
        """
        results = self.match(
            'match_phrase', **{field:field_value}).hits.hits

        if not results:
            results = self.match('match', _all=field_value).hits.hits

        return [hit['_id'] for hit in results]

    def suggest(self, ad_id, field):
        """
        The suggest function suggests other ad_ids that share this
        field with the input ad_id.
        """
        suggestions = {}
        field_values = self.lookup(ad_id, field)

        for value in field_values:
            ads = set(self.reverse_lookup(field, value))

            # To prevent cycles
            if isinstance(ad_id, list):
                ads -= set(ad_id)
            else:
                ads.discard(ad_id)
            suggestions[value] = list(ads)

        return suggestions