Пример #1
0
    def write_es_geo(self,
                     es_host='http://localhost:9200/',
                     index_name="geos",
                     doc_type='user_geos'):
        # try to connect with ES and delete the index
        es = ElasticSearch('http://localhost:9200/')

        ## uncomment the following code to prompt check
        # print "Will delete all the doc in the [index:type] from ElasticSearch:"
        # print index_name, ":", doc_type
        # confirm = raw_input("Sure?(y/n)")
        # if confirm!="y":
        # 	sys.exit(0)

        try:
            create_es()
        except Exception as e:
            print "Error", e
        else:
            print index_name, ":", doc_type, " deleted!"
        # initializing the documents
        documents = []
        for record in self.userGeos:
            doc = {
                'uid': int(record[0]),
                'location': {
                    'lat': record[1],
                    'lon': record[2]
                }
            }
            documents.append(doc)
        print "Bulk indexing", len(documents), "documents.."
        es.bulk_index(index_name, doc_type, documents, id_field='uid')
        es.refresh(index_name)
        # test usage
        print "results from ES,"
        query = {"from": 0, "size": 2000, 'query': {"match_all": {}}}
        res = es.search(query, index=index_name)
        print len(res['hits']['hits']), "documents found"
        print "sample result"
        print res['hits']['hits'][0]
Пример #2
0
	def write_es_geo(self, es_host='http://localhost:9200/', index_name="geos", doc_type='user_geos'):
		# try to connect with ES and delete the index
		es = ElasticSearch('http://localhost:9200/')

		## uncomment the following code to prompt check
		# print "Will delete all the doc in the [index:type] from ElasticSearch:"
		# print index_name, ":", doc_type
		# confirm = raw_input("Sure?(y/n)")
		# if confirm!="y":
		# 	sys.exit(0)


		try:
			create_es()
		except Exception as e:
			print "Error", e
		else:
			print index_name,":", doc_type," deleted!"
		# initializing the documents
		documents = []
		for record in self.userGeos:
			doc = {'uid':int(record[0]), 'location':{'lat':record[1],'lon':record[2]}}
			documents.append(doc)
		print "Bulk indexing", len(documents),"documents.."
		es.bulk_index(index_name, doc_type, documents, id_field='uid')
		es.refresh(index_name)
		# test usage
		print "results from ES,"
		query = {
			"from" : 0, "size" : 2000,
			'query': {
				 "match_all" : { }
			 }
		 }
		res =  es.search(query, index=index_name)
		print len(res['hits']['hits']), "documents found"
		print "sample result"
		print res['hits']['hits'][0]
Пример #3
0
                "order": "asc",
                "unit": "km"
            }
        }]
    }
    query1 = {
        "from": 0,
        "size": 10,
        'query': {
            "match_all": {}
        },
        "sort": [{
            "likes": {
                "order": "desc"
            }
        }, "_score"],
    }
    query_count = {"facets": {"count_by_type": {"terms": {"field": "_type"}}}}
    # res =  es.search(query, index='photo_geos',doc_type=['photos'])
    res = es.search(query_count, index='geos', doc_type=['user_geos'])
    print res
    sys.exit(0)

    uids = [
        (r['_id'], r['sort'][0], r['_source']['views'], r['_source']['likes'],
         r['_source']['location']['lat'], r['_source']['location']['lon'])
        for r in res['hits']['hits']
    ]
    print len(uids)
    for i in range(len(uids)):
        print uids[i]
Пример #4
0
class Collation:
    def __init__(self, es_server_url):
        self.server = ElasticSearch(es_server_url)
        self.datadict = {
            "prc-out": {
                "lookup": Query("prc-out", "source"),
                "action": {
                    "definition": Aggregator("drop"),
                    "data": Aggregator({"in": Aggregator("add"), "out": Aggregator("add"), "file": Aggregator("cat")}),
                    "ls": Aggregator("check"),
                    "stream": Aggregator("check"),
                    "source": Aggregator("match"),
                },
            },
            "prc-in": {
                "lookup": Query("prc-in", "dest"),
                "action": {
                    "definition": Aggregator("drop"),
                    "data": Aggregator({"out": Aggregator("add")}),
                    "ls": Aggregator("check"),
                    "index": Aggregator("cat"),
                    "source": Aggregator("check"),
                    "dest": Aggregator("check"),
                    "process": Aggregator("cat"),
                },
            },
            "prc-s-state": {
                "lookup": Query("prc-s-state"),
                "action": {
                    "macro": Aggregator("histoadd"),
                    "mini": Aggregator("histoadd"),
                    "micro": Aggregator("histoadd"),
                    "tp": Aggregator("add"),
                    "lead": Aggregator("avg"),
                    "nfiles": Aggregator("add"),
                    "ls": Aggregator("check"),
                    "process": Aggregator("cat"),
                },
            },
        }

    def lookup(self, doctype):
        return self.datadict[doctype]["lookup"]

    def action(self, doctype):
        return self.datadict[doctype]["action"]

    # print datadict[type]['lookup']
    def search(self, ind, doctype, ls, stream=None):
        if stream:
            result = self.server.search(self.lookup(doctype)(ls, stream), index=ind)
        else:
            result = self.server.search(self.lookup(doctype)(ls), index=ind)
        return result

    def collate(self, ind, doctype, ls, stream=None):
        result = self.search(ind, doctype, ls, stream)
        for element in result["hits"]["hits"]:
            for k, v in element["_source"].items():
                self.action(doctype)[k](v)
        retval = dict((k, v.value()) for k, v in self.action(doctype).items())
        for v in self.action(doctype).values():
            v.reset()
        return retval

    def refresh(self, ind):
        self.server.refresh(ind)

    def stash(self, ind, doctype, doc):
        result = self.server.index(ind, doctype, doc)
        return result
Пример #5
0
    es = ElasticSearch("http://localhost:9200/")
    lat = float(sys.argv[1])
    lon = float(sys.argv[2])
    r = float(sys.argv[3])
    print lat, lon, r
    query = {
        "from": 0,
        "size": 10,
        "query": {"match_all": {}},
        "filter": {"geo_distance": {"distance": str(r) + "km", "location": {"lat": lat, "lon": lon}}},
        "sort": [{"_geo_distance": {"location": {"lat": lat, "lon": lon}, "order": "asc", "unit": "km"}}],
    }
    query1 = {"from": 0, "size": 10, "query": {"match_all": {}}, "sort": [{"likes": {"order": "desc"}}, "_score"]}
    query_count = {"facets": {"count_by_type": {"terms": {"field": "_type"}}}}
    # res =  es.search(query, index='photo_geos',doc_type=['photos'])
    res = es.search(query_count, index="geos", doc_type=["user_geos"])
    print res
    sys.exit(0)

    uids = [
        (
            r["_id"],
            r["sort"][0],
            r["_source"]["views"],
            r["_source"]["likes"],
            r["_source"]["location"]["lat"],
            r["_source"]["location"]["lon"],
        )
        for r in res["hits"]["hits"]
    ]
    print len(uids)
Пример #6
0
class Collation:
    def __init__(self,es_server_url):
        self.server = ElasticSearch(es_server_url)
        self.datadict = {
            'prc-out' : {
                "lookup" : Query('prc-out','source'),
                "action" : {
                    'definition' : Aggregator('drop'),
                    'data': Aggregator({'in': Aggregator('add'),
                                        'out': Aggregator('add'),
                                        'file':Aggregator('cat')
                                        }),
                    'ls' : Aggregator('check'),
                    'stream' : Aggregator('check'),
                    'source' : Aggregator('match')
                    }
                },
            'prc-in' : {
                "lookup" : Query('prc-in','dest'),
                "action" : {
                    'definition' : Aggregator('drop'),
                    'data': Aggregator({
                            'out'    : Aggregator('add'),
                            }),
                    'ls'     : Aggregator('check'),
                    'index'  : Aggregator('cat'),
                    'source' : Aggregator('check'),
                    'dest'   : Aggregator('check'),
                    'process': Aggregator('cat')
                    }
                },
            'prc-s-state' : {
                "lookup" : Query('prc-s-state'),
                "action" : {
                    'macro'  : Aggregator('histoadd'),
                    'mini'   : Aggregator('histoadd'),
                    'micro'  : Aggregator('histoadd'),
                    'tp'     : Aggregator('add'),
                    'lead'   : Aggregator('avg'),
                    'nfiles' : Aggregator('add'),
                    'ls'     : Aggregator('check'),
                    'process': Aggregator('cat')
                    }
                }
            }
    def lookup(self,doctype):
        return self.datadict[doctype]['lookup']
    def action(self,doctype):
        return self.datadict[doctype]['action']

#print datadict[type]['lookup']
    def search(self,ind,doctype,ls,stream=None):
        if stream:
            result=self.server.search(self.lookup(doctype)(ls,stream),
                             index=ind)
        else:
            result=self.server.search(self.lookup(doctype)(ls),
                             index=ind)
        return result

    def collate(self,ind,doctype,ls,stream=None):
        result = self.search(ind,doctype,ls,stream)
        for element in  result['hits']['hits']:
            for k,v in element['_source'].items():
                self.action(doctype)[k](v)
        retval = dict((k,v.value()) for k,v in self.action(doctype).items())
        for v in self.action(doctype).values(): v.reset()
        return retval

    def refresh(self,ind):
        self.server.refresh(ind)

    def stash(self,ind,doctype,doc):
        result=self.server.index(ind,doctype,doc)
        return result
Пример #7
0
class Collation:
    def __init__(self, es_server_url):
        self.server = ElasticSearch(es_server_url)
        self.datadict = {
            'prc-out': {
                "lookup": Query('prc-out', 'source'),
                "action": {
                    'definition':
                    Aggregator('drop'),
                    'data':
                    Aggregator({
                        'in': Aggregator('add'),
                        'out': Aggregator('add'),
                        'file': Aggregator('cat')
                    }),
                    'ls':
                    Aggregator('check'),
                    'stream':
                    Aggregator('check'),
                    'source':
                    Aggregator('match')
                }
            },
            'prc-in': {
                "lookup": Query('prc-in', 'dest'),
                "action": {
                    'definition': Aggregator('drop'),
                    'data': Aggregator({
                        'out': Aggregator('add'),
                    }),
                    'ls': Aggregator('check'),
                    'index': Aggregator('cat'),
                    'source': Aggregator('check'),
                    'dest': Aggregator('check'),
                    'process': Aggregator('cat')
                }
            },
            'prc-s-state': {
                "lookup": Query('prc-s-state'),
                "action": {
                    'macro': Aggregator('histoadd'),
                    'mini': Aggregator('histoadd'),
                    'micro': Aggregator('histoadd'),
                    'tp': Aggregator('add'),
                    'lead': Aggregator('avg'),
                    'nfiles': Aggregator('add'),
                    'ls': Aggregator('check'),
                    'process': Aggregator('cat')
                }
            }
        }

    def lookup(self, doctype):
        return self.datadict[doctype]['lookup']

    def action(self, doctype):
        return self.datadict[doctype]['action']

#print datadict[type]['lookup']

    def search(self, ind, doctype, ls, stream=None):
        if stream:
            result = self.server.search(self.lookup(doctype)(ls, stream),
                                        index=ind)
        else:
            result = self.server.search(self.lookup(doctype)(ls), index=ind)
        return result

    def collate(self, ind, doctype, ls, stream=None):
        result = self.search(ind, doctype, ls, stream)
        for element in result['hits']['hits']:
            for k, v in element['_source'].items():
                self.action(doctype)[k](v)
        retval = dict((k, v.value()) for k, v in self.action(doctype).items())
        for v in self.action(doctype).values():
            v.reset()
        return retval

    def refresh(self, ind):
        self.server.refresh(ind)

    def stash(self, ind, doctype, doc):
        result = self.server.index(ind, doctype, doc)
        return result