Exemplo n.º 1
0
import sys
import os
from img_to_vec import Img2Vec
from PIL import Image
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from time import time
from saveload import save_obj
input_path = './Styles'

img2vec = Img2Vec(model='alexnet')
a = []
names = []
# For each test image, we store the filename and vector as key, value in a dictionary
pics = {}
for file in os.listdir(input_path):
    filename = os.fsdecode(file)
    img = Image.open(os.path.join(input_path, filename))
    vec = img2vec.get_vec(img)
    pics[filename] = vec
    a.append(vec)
    names.append(file)
save_obj(pics, 'pics_vocab')
save_obj(a, 'vecs')
save_obj(names, 'names')
Exemplo n.º 2
0
def relative_queryset(all_queries, fname, trange, qassoc=None, loc='GB', verbose=False):
	cmpext = '_5comp'

	print('Beginning relative queryset compilation...')
	
	PYTREND=None

	if not os.path.isdir('obj'):
		os.makedirs('obj')
	if os.path.exists('obj/'+fname+'.pkl'):
		print('Loading previous file...')
		global_dict = sl.load_obj(fname)
	else:
		global_dict = {}
		sl.save_obj(global_dict, fname)
	if type(all_queries)==str:
		query_elements = all_queries.split(', ')
	elif type(all_queries)==list:
		query_elements = all_queries
	else:
		print("Data type for query input must be 'list' or 'string'")
		exit()
	

	if verbose:
		print('Query elements: ', query_elements)


	"""
	global_dict is the main dictionary - all search terms are stored individually here, including
	their relative factor
	
	First just loop through the queries and search for each in pytrends
	"""
	for query in query_elements:
		if not query in global_dict:
			if PYTREND==None:
				PYTREND = TrendReq(hl='en-UK', tz=0)
			print('Requesting data for: ', query)
			global_dict[query] = {'data':relative_data([query], PYTREND, trange=trange, loc=loc), 'factor':.0}
			sl.save_obj(global_dict, fname)
			#print('Data for %s saved successfully.' %query)
			#Need to wait so as not to exceed google's rate limit
			#time.sleep(random.randint(1, 4))
		else:
			print('"%s" already stored in dictionary.' %query)


	"""
	del_queries - queries for which there is no data

	go through the global_dict and check if there is data
	"""
	del_queries = []
	
	for qu in global_dict:
		if global_dict[qu]['data']==None:
			del_queries.append(qu)


	"""
	global_dict2 is a dictionary in which 5 search terms are compared
	at once. By applying a 'reference term', each 5 are compared to each
	other 5. 

	"""

	
	#Remove queries which we do not want.
	query_set = [q for q in global_dict if q not in del_queries]

	query_comps = []
	iqueries = range(len(query_set))
	Nlow  = 1
	lq_val = 1e-7
	MaxLoops = 5

	icheck = 0

	#Try to load old versions of the 5 query comparison
	if os.path.exists('obj/'+fname+cmpext+'.pkl'):
		print('Loading previous for 5 comparison file...')
		global_dict2 = sl.load_obj(fname+cmpext)
		iqueries = find_lowfactors(global_dict, query_set, thresh=lq_val)
	else:
		global_dict2 = {}
		#sl.save_obj(global_dict, fname+cmpext)


	#query_set = copy.copy(query_elements)
	#ESTABLISH PLAN BEFORE CONTINUING... REWRITE!!!
	if not 'REF' in global_dict2:
		global_dict2['REF'] = ''
		sl.save_obj(global_dict2, fname+cmpext)

	ref_query = global_dict2['REF']
			


	"""
	At present, this is pretty rough - I simply set search terms which have '0' searches
	relative to the other terms to a small number (1e-7) for the subsequent pairwise 
	comparison (which is the one thats important to get right...)

	"""

	while len(iqueries)>Nlow and icheck<MaxLoops:

		print('Five query check, loop number %d/%d'%(icheck+1, MaxLoops))
		query_temp = []
		subiquery=0
		for iquery in iqueries:
			query_temp.append(query_set[iquery])
			#If there is not a reference query then we need to strip the first 5 queries
			#If there is a reference query already, can simply divide the queries into 4s
			if ref_query=='':
				groupsize = GROUPSIZE
			else:
				groupsize = GROUPSIZE-1
			if len(query_temp)%groupsize==0 or iquery==len(query_set)-1:
				if not ref_query == '':
					query_temp.append(ref_query)
				qstring = ", ".join(query_temp)

				if PYTREND==None:
					PYTREND = TrendReq(hl='en-UK', tz=0)

				if not qstring in global_dict2:
					print('Requesting data for: ', qstring)
					global_dict2[qstring] = {'data': relative_data(query_temp, PYTREND, trange=trange, loc=loc), 'factor':1.0}
					
				if ref_query == '':
					ref_query = get_reference(global_dict2[qstring]['data'])
					global_dict2['REF']=ref_query
				if not qstring in global_dict2:
					sl.save_obj(global_dict2, fname+cmpext)

				for quer in query_temp:
					if max(global_dict2[qstring]['data'][ref_query])>1e-7:
						global_dict[quer]['factor'] = float(max(global_dict2[qstring]['data'][quer]))/float(max(global_dict2[qstring]['data'][ref_query]))
					else:
						global_dict[quer]['factor'] =lq_val

					
					global_dict[quer]['f_approx'] = True
						
				query_temp = []

		iqueries = find_lowfactors(global_dict, query_set, thresh=lq_val)
		
		if len(iqueries)>Nlow and icheck>=MaxLoops:
			print('Error: Number of sweeps for the 5 query comparison exceeded')
			print('Number of queries below limit: %d/%d'%(len(iqueries)/len(global_dict)))
			exit()

		icheck+=1
		
	sl.save_obj(global_dict, fname)
	


	"""
	global_dict3 is where the pairwise comparison happens
	
	Problem: low search terms presently cannot be ordered for the
	pairwise comparison. Might fail to get correct scaling at the 
	low popularity end. Probably need to add a number of loops in
	which global_dict2 is refined for queries where factor==1e-7

	"""
	global_dict, global_dict3 = pairwise_queryset(global_dict, query_set, trange,fname, ptrend=PYTREND,loc=loc)
	
			

	
	return global_dict, global_dict2, global_dict3
Exemplo n.º 3
0
def main(args):
    G = sl.load_obj("adj_list_47")

    # Show fire stations
    if (args.show_fs):
        firestations = sl.load_obj("firestations")
        for station in firestations:
            print(station['id'], station['name'])

    # Get Info By ID
    if (args.info is not None):
        try:
            res = GetInfo(args.info)
            for i in res:
                print(i, res[i])

        except UnboundLocalError:
            print("Wrong id. Check it and try again.")
        except:
            print("Oops, something went wrong")

    #First half of task.
    if (args.pick_fs_and_bds is not None):

        m = args.pick_fs_and_bds[0]
        n = args.pick_fs_and_bds[1]
        # Pick random m firestations and n buildings
        builds, stations = GetBuildingsObjects(args.pick_fs_and_bds[2], n, m)

        # Now to analyse the distance values we have t orun DijkstraWithFinishNodes for all stations and all buildings

        res_builds = {}
        res_stations = {}
        mindist_from_builds = {}
        mindist_from_stations = {}

        if (args.no_recalc_values is None):
            for b in builds:
                res_builds[b] = graph.DijkstraWithFinishNodes(G, b, stations)
                dist, minfb = graph.GetMinDist(res_builds[b][0], stations)
                mindist_from_builds[b] = (dist, minfb)

                print("Для дома c id = ", b, " ближайшая пожарная станция - ",
                      GetInfo(minfb[0])['name'], " id = ", minfb)
                print(
                    "-----------------------------------------------------------------------------------------------------------"
                )

            print("\n")
            for st in stations:
                res_stations[st] = graph.DijkstraWithFinishNodes(G, st, builds)
                dist, minfs = graph.GetMinDist(res_stations[st][0], builds)
                mindist_from_stations[st] = (dist, minfs)

                print("Для пожарной станции ",
                      GetInfo(st)["name"], " id = ", st,
                      " ближайший дом c id = ", minfs)
                print(
                    "-----------------------------------------------------------------------------------------------------------"
                )

            sl.save_obj(res_builds, "res_builds")
            sl.save_obj(res_stations, "res_stations")
            sl.save_obj(mindist_from_stations, "mindist_from_stations")
            sl.save_obj(mindist_from_builds, "mindist_from_builds")

        else:
            res_builds = sl.load_obj("res_builds")
            res_stations = sl.load_obj("res_stations")
            mindist_from_builds = sl.load_obj("mindist_from_builds")
            mindist_from_stations = sl.load_obj("mindist_from_stations")

        # Ищем минимум расстояния туда+обратно
        if (args.both_sides is not None):
            for b in builds:
                min_dist_sum = float('inf')
                closest_station = None
                for st in stations:
                    if (min_dist_sum >
                            res_stations[st][0][b] + res_builds[b][0][st]):
                        min_dist_sum = res_stations[st][0][b] + res_builds[b][
                            0][st]
                        closest_station = st

                print(
                    f"Для дома с id = {b} ближайшая(по сумме туда-обратно) пожарная станция {GetInfo(closest_station)['name']}"
                )

        #Для какого объекта инфраструктуры сумма кратчайших расстояний от него до всех домов минимальна.
        if (args.sum_of_paths is not None):
            min_of_sum = float('inf')
            minel = None
            for st in stations:
                currsum = 0
                for s in res_stations[st][0]:
                    currsum += res_stations[st][0][s]

                if (min_of_sum < currsum):
                    min_of_sum = currsum
                    minel = st

            print(
                f"Для объекта {GetInfo(st)['name']}, id = {st} сумма кратчайших расстояний от него до всех домов минимальна"
            )

        #Для какого объекта инфраструктуры построенное дерево кратчайших путей имеет минимальный вес
        if (args.sum_of_tree is not None):
            min_of_tree = float('inf')
            mintree = None
            for st in stations:
                tree = graph.GetTree(builds, res_stations[st][1])
                sumoftree = graph.GetSumOfTree(tree, G)
                if (min_of_tree < sumoftree):
                    min_of_tree = sumoftree
                    mintree = st

            print(
                f"Дерево кратчайших путей имеет минимальный вес для {GetInfo(st)['name']}, id = {st}"
            )

        # Определить, какой из объектов расположен так, что расстояние между ним и самым дальним домом минимально
        if (args.mindist_fs is not None):
            mindist_global = float('inf')
            resfs = None
            for st in stations:
                # Ищем максимально удаленый дом
                maxdist = 0
                for s in res_stations[st][0]:
                    if maxdist < res_stations[st][0][s] and res_stations[st][
                            0][s] != float('inf'):
                        maxdist = res_stations[st][0][s]
                if maxdist < mindist_global:
                    mindist_global = maxdist
                    resfs = st
            print(
                f"Объект {GetInfo(resfs)['name']} с id = {resfs} расположен так, что расстояние между ним и самым дальним домом минимально"
            )

        # Для каждого дома определить объекты, расположенные не далее, чем в X км
        if (args.set_distance is not None):
            X = args.set_distance

            not_far_stations = {}
            for b in builds:
                not_far_stations[b] = []
                for st in stations:
                    if res_builds[b][0][st] <= X:
                        not_far_stations[b].append(st)
            for b in builds:
                if len(not_far_stations[b]) > 0:
                    print(
                        f"Для дома {b} следующие станции находятся ближе чем в {X} условных км досигаемости(при движении от дома к станции): {not_far_stations[b]}"
                    )
                else:
                    print(
                        f"Для дома {b} никакие станции не находятся ближе чем в {X} условных км досигаемости(при движении от дома к станции)"
                    )
            print(
                "-----------------------------------------------------------------------------------------------------------"
            )

            for b in builds:
                not_far_stations[b] = []
                for st in stations:
                    if res_stations[st][0][b] <= X:
                        not_far_stations[b].append(st)
            for b in builds:
                if len(not_far_stations[b]) > 0:
                    print(
                        f"Для дома {b} следующие станции находятся ближе чем в {X} условных км досигаемости(при движении от станции к дому): {not_far_stations[b]}"
                    )
                else:
                    print(
                        f"Для дома {b} никакие станции не находятся ближе чем в {X} условных км досигаемости(при движении от станции к дому)"
                    )
            print(
                "-----------------------------------------------------------------------------------------------------------"
            )

            for b in builds:
                not_far_stations[b] = []
                for st in stations:
                    if res_stations[st][0][b] + res_builds[b][0][st] <= X:
                        not_far_stations[b].append(st)
            for b in builds:
                if len(not_far_stations[b]) > 0:
                    print(
                        f"Для дома {b} следующие станции находятся ближе чем в {X} условных км досигаемости(при необходимости возвращения): {not_far_stations[b]}"
                    )
                else:
                    print(
                        f"Для дома {b} никакие станции не находятся ближе чем в {X} условных км досигаемости(при необходимости возвращения)"
                    )
    # Вторая часть Исследовательской работы. Кластеры--------------------------------------------------------------------------------------------
    if (args.data_for_clusters is not None):
        n = args.data_for_clusters[0]
        # Pick random n buildings and 1 firestation
        builds, station = GetBuildingsObjects(args.data_for_clusters[1], n, 1)

        # Lets build a tree
        station = station[0]
        dij = graph.DijkstraWithFinishNodes(G, station, builds)
        tree = graph.GetTree(builds, dij[1])

        # Тут наверно надо нарисовать дерево

        # Считаем длину дерева
        treelen = graph.GetSumOfTree(
            tree, G
        )  # return sum of tree; edges - from GetTree, adj_list from GetGraphList

        # Считаем сумму кратчайших расстояний
        that_sum = 0
        for el in dij[0]:
            if (dij[0][el] != float('inf')):
                that_sum += dij[0][el]

        print(f"Сумма кратчайших расстояний равна {that_sum}")

        res_builds = {}
        res_stations = {}
        mindist_from_builds = {}
        mindist_from_stations = {}

        #Сюда идет исследование для К = 2,3,4

        K = [2]  #3,4]

        for k in K:
            clu = clusters.Get_k_Clusters(builds, G, k)

            # Поиск центроид
            print("начался поиск центроид")
            center = clusters.Find_Centers(clu, G)
            print("Центроиды: ", center)

            # Поиск кратчайших путей в кластерах
            pos = 0
            for c in clu:
                ccen = center[pos]  # центроида этого кластера

                cdij = graph.DijkstraWithFinishNodes(G, ccen, c)
                ctree = graph.GetTree(c, cdij[1])

                # Считаем длину дерева
                ctreelen = graph.GetSumOfTree(ctree, G)
                print(f"Для {pos}-го кластера длина дерева равна {ctreelen}")

                # Считаем сумму кратчайших расстояний
                csum = 0
                for el in cdij[0]:
                    if (cdij[0][el] != float('inf')):
                        csum += cdij[0][el]

                print(
                    f"Сумма кратчайших расстояний для {pos}-го кластера равна {csum}"
                )
                pos += 1
    # Построение Дендрограммы
    if (args.dendrogram is not None):
        n = args.dendrogram[0]
        # Pick random n buildings and 1 firestation
        builds, station = GetBuildingsObjects(args.dendrogram[0], n, 1)

        plt.figure()
        dn = hierarchy.dendrogram(clusters.Get_Dendro_matr(builds, G))
        plt.savefig('foo.pdf')

        #if(args.no_recalc_values is None):
        #   print("here we go")
    if (args.get_2_3_5_clusters is not None):
        n = args.get_2_3_5_clusters[0]
        # Pick random n buildings and 1 firestation
        builds, station = GetBuildingsObjects(args.get_2_3_5_clusters[1], n, 1)
        print(len(builds))
        print(station)
        print('-' * 25)

        #res_5, res_3, res_2, dendromatr = clusters.Get_Clusters(builds, G)
        #Сюда идет исследование для К = 2,3,4

        K = [2, 3, 5]

        for k in K:
            print(k)
            clu = None
            if (k == 2):
                clu = sl.load_obj("res_2")
            else:
                clu = clusters.Get_k_Clusters(builds, G, k)
                sl.save_obj(clu, "res_" + str(k))

            # Поиск центроид
            print("начался поиск центроид")
            center = clusters.Find_Centers(clu, G)
            sl.save_obj(center, "center" + str(k))
            print("Центроиды: ", center)

            # Дерево кратчайших путей от станции до центроидов
            #fs_to_cen = sl.load_obj("fs_to_cen_"+str(k))
            fs_to_cen = graph.DijkstraWithFinishNodes(G, station[0], center)
            fs_to_cen_tree = graph.GetTree(center, fs_to_cen[1])
            #fs_to_cen_tree = sl.load_obj("fs_to_cen_tree_"+str(k))
            sl.save_obj(fs_to_cen, "fs_to_cen_" + str(k))
            sl.save_obj(fs_to_cen_tree, "fs_to_cen_tree_" + str(k))

            # Сумма кратчайших расстояний
            that_sum = 0
            for el in fs_to_cen[0]:
                if (fs_to_cen[0][el] != float('inf')):
                    that_sum += fs_to_cen[0][el]

            print(f"Сумма кратчайших расстояний равна {that_sum}")
            # Поиск кратчайших путей в кластерах
            pos = 0
            for c in clu:
                ccen = center[pos]  # центроида этого кластера

                cdij = graph.DijkstraWithFinishNodes(G, ccen, c)
                sl.save_obj(cdij, "cdij" + str(k) + str(pos))
                ctree = graph.GetTree(c, cdij[1])
                sl.save_obj(ctree, "ctree" + str(k) + str(pos))

                # Считаем длину дерева
                ctreelen = graph.GetSumOfTree(ctree, G)

                print(f"Для {pos}-го кластера длина дерева равна {ctreelen}")

                # Считаем сумму кратчайших расстояний
                #csum = 0
                #for el in cdij[0]:
                #   if(cdij[0][el]!=float('inf')):
                #      csum+= cdij[0][el]

                #print(f"Сумма кратчайших расстояний для {pos}-го кластера равна {csum}")
                pos += 1
        #sl.save_obj(res_5,"res_5")
        #sl.save_obj(res_3,"res_3")
        #sl.save_obj(res_2,"res_2")

        #sl.save_obj(dendromatr, "dendromatr")

    if (args.get_2_3_5_clusters_load is not None):
        n = args.get_2_3_5_clusters[0]
        # Pick random n buildings and 1 firestation
        builds, station = GetBuildingsObjects(args.get_2_3_5_clusters[1], n, 1)
        print(len(builds))
        print(station)
        print('-' * 25)

        #res_5, res_3, res_2, dendromatr = clusters.Get_Clusters(builds, G)
        #Сюда идет исследование для К = 2,3,4

        K = [2, 3, 5]

        for k in K:
            print(k)
            clu = None
            if (k == 2):
                clu = sl.load_obj("res_2")
            else:
                clu = clusters.Get_k_Clusters(builds, G, k)
                sl.save_obj(clu, "res_" + str(k))

            # Поиск центроид
            print("начался поиск центроид")
            center = clusters.Find_Centers(clu, G)
            sl.save_obj(center, "center" + str(k))
            print("Центроиды: ", center)

            # Дерево кратчайших путей от станции до центроидов
            #fs_to_cen = sl.load_obj("fs_to_cen_"+str(k))
            fs_to_cen = graph.DijkstraWithFinishNodes(G, station[0], center)
            fs_to_cen_tree = graph.GetTree(center, fs_to_cen[1])
            #fs_to_cen_tree = sl.load_obj("fs_to_cen_tree_"+str(k))
            sl.save_obj(fs_to_cen, "fs_to_cen_" + str(k))
            sl.save_obj(fs_to_cen_tree, "fs_to_cen_tree_" + str(k))

            # Сумма кратчайших расстояний
            that_sum = 0
            for el in fs_to_cen[0]:
                if (fs_to_cen[0][el] != float('inf')):
                    that_sum += fs_to_cen[0][el]

            print(f"Сумма кратчайших расстояний равна {that_sum}")
            # Поиск кратчайших путей в кластерах
            pos = 0
            for c in clu:
                ccen = center[pos]  # центроида этого кластера

                cdij = graph.DijkstraWithFinishNodes(G, ccen, c)
                sl.save_obj(cdij, "cdij" + str(k) + str(pos))
                ctree = graph.GetTree(c, cdij[1])
                sl.save_obj(ctree, "ctree" + str(k) + str(pos))

                # Считаем длину дерева
                ctreelen = graph.GetSumOfTree(ctree, G)

                print(f"Для {pos}-го кластера длина дерева равна {ctreelen}")

                # Считаем сумму кратчайших расстояний
                #csum = 0
                #for el in cdij[0]:
                #   if(cdij[0][el]!=float('inf')):
                #      csum+= cdij[0][el]

                #print(f"Сумма кратчайших расстояний для {pos}-го кластера равна {csum}")
                pos += 1
Exemplo n.º 4
0
def draw_city_graph_rebrand(name):
    #node_coords = xmlparser.getNodesCoords().values()
    node_coords = saveload.load_obj("coords")


    #G = graph.GetGraphList()
    #saveload.save_obj(G, 'adj_list')
    G = saveload.load_obj(name)
    #node_coords = {'1':('1','1'),'2':('2','1'),'2.5':('2.5','2'),'3':('3','1'),'4':('4','0')}
    #G = {'1' : [('2','1'),('2.5','1')], '2.5': [('1','1'),('4','1'),('3','1')], '3':[('2.5','1'),('4','1')], '4':[('2.5','1'),('3','1')]}
    x = []
    y = []
    #x = [1,2,1,2.5,4,2.5,3,2.5,3,4]
    #y = [1,1,1,2,0,2,1,2,1,0]
    plt.ioff()

    buildings = saveload.load_obj("buildings")
    bid = [ building['id'] for building in buildings]

    firestations = saveload.load_obj("firestations")
    fid = [ firestation['id'] for firestation in firestations]
    colors = []

    fig = plt.gcf()
    fig.set_size_inches(50, 50, forward=True)


    pos = 0
    total = len(G.keys())

   # with open('lens.txt', "w") as f:
    #    for i in G.keys():
     #       f.write("len(G[i] = " + str(len(G[i])) + '\n')

    for i in G.keys():
        if len(G[i]) > 4:
            print("len(", i, ") > 4 and equal ", len(G[i]))

    for i in G.keys():
        i_coords = (float(node_coords[i][0]),float(node_coords[i][1]))

        print("step ", pos, "from", total)
        pos += 1
        print(len(G[i]))
        for j in G[i]:
            j_coords = (float(node_coords[j[0]][0]),float(node_coords[j[0]][1]))

            plt.scatter(float(i_coords[0]),float(i_coords[1]),linewidths=10,c = "blue")
            plt.scatter(float(j_coords[0]),float(j_coords[1]),linewidths=10,c = "blue")
            plt.plot((float(i_coords[0]), float(j_coords[0])), (float(i_coords[1]), float(j_coords[1])), '-k')


    #for i in node_coords:
     #   print(i)
      #  x.append(float(i[0]))
       # y.append(float(i[    colors = []

    #for i in range(len(x)):
     #   plt.scatter(x[i],y[i],linewidths=10,c = colors[i])

    #for i in range(0,(len(x)-1),2):
     #   plt.plot((x[i], x[i+1]), (y[i], y[i+1]), '-k')
    #i = 0

    #for node in G:
     #   (node_lat, node_lon) = coords[node]
      #  node_lat = float(node_lat)
       # node_lon = float(node_lon)
        #for adj_node in G[node]:
         #   (adj_node_lat,adj_node_lon) = coords[adj_node]
          #  adj_node_lat = float(adj_node_lat)
           # adj_node_lon = float(adj_node_lon)
            #plt.plot([node_lat,adj_node_lat], [node_lon,adj_node_lon], 'black')
        #i = i + 1
        #print(i)

    fig.savefig('classic_Voronezh.png', dpi=100)
    saveload.save_obj(fig, 'fig_demo')
Exemplo n.º 5
0
def pairwise_queryset(global_dict, query_set, trange, fname, ptrend=None, loc='GB', tol=1e-1):
	MaxLoop = len(global_dict)
	ref_fact = 1.0
	allfacts_pw = np.ones(len(query_set))*ref_fact
	ipair =0
	notSorted=True
	cmp2ext = '_2comp'


	allquers = []
	allfacts = []
	
	print('Data comparison before pairwise sweep:')
	print('Query    Factor    Approx?')

	for quer in query_set:
		allfacts.append(global_dict[quer]['factor'])
		allquers.append(quer)

	allquers = sort_queries(allquers, allfacts)


	global_dict3 = {}

	if os.path.exists('obj/'+fname+cmp2ext+'.pkl'):
		print('Loading previous for 2 comparison file...')
		global_dict3 = sl.load_obj(fname+cmp2ext)

	
	
	allquers_prev = allquers
	iqueries = np.arange(len(allquers))
	iqsrt = iqueries
	iqsrt_prev = iqueries

	tmp_dict = {}
	for q in allquers:
		tmp_dict[q] = global_dict[q]['factor']

	srt_pw_dict =  OrderedDict(sorted(tmp_dict.items(), key=operator.itemgetter(1), reverse=True))
	srt_pw_list = []
	for key in srt_pw_dict:
		srt_pw_list.append(key)

	
	data_warning = {}
	while notSorted and ipair<MaxLoop:

		print('Pairwise sweep %d/%d'%(ipair+1, MaxLoop))
		
		srt_pw_list_prev = copy.copy(srt_pw_list)

		srt_pw_dict[srt_pw_list[0]] =1.

		#Loop through query indices, sorted into descending popularity
		for iiq in range(len(srt_pw_list)-1):
			
			if ptrend==None:
				ptrend = TrendReq(hl='en-UK', tz=0)

			quer = srt_pw_list[iiq]
			next_quer =  srt_pw_list[iiq+1]


			qstring = str(quer)+", "+str(next_quer)
			qtmp = [quer, next_quer]

			if not qstring in global_dict3:
				print('Requesting data for: ', qstring)
				
				global_dict3[qstring] = {'data': relative_data(qtmp, ptrend, trange=trange, loc=loc), 'factor':1.0}

				sl.save_obj(global_dict3, fname+cmp2ext)

			
			q1sum = float(sum(global_dict3[qstring]['data'][qtmp[0]]))
			q2sum = float(sum(global_dict3[qstring]['data'][qtmp[1]]))


			if q2sum/(q1sum+1e-10)>100. or q2sum/(q1sum+1e-10)<0.01:
				print('Error: pairwise comparison yielded a drop fraction greater than 100')
				print('Comparison terms: {0} vs. {1}'.format(qtmp[0], qtmp[1]))
				print('\n**********\nData 1:', global_dict3[qstring]['data'][qtmp[0]])
				print('\n**********\nData 2:', global_dict3[qstring]['data'][qtmp[1]])
				exit()

			#If the next query is much more popular than the previous then swap. 
			if q2sum>1.1*q1sum:
				print('Swapping: %s with %s (ratio: %.2lf)'%(quer, next_quer, q2sum/q1sum))
				srt_pw_list[iiq], srt_pw_list[iiq+1] = srt_pw_list[iiq+1], srt_pw_list[iiq]
				q1sum, q2sum = q2sum, q1sum
				srt_pw_dict[srt_pw_list[iiq]] = 1.

				if ipair>120:
					plt.plot(global_dict3[qstring]['data']['Date'], global_dict3[qstring]['data'][quer])
					plt.plot(global_dict3[qstring]['data']['Date'], global_dict3[qstring]['data'][next_quer])
					plt.show()
			
			srt_pw_dict[srt_pw_list[iiq+1]] = q2sum/q1sum

			
			
			qtmp = []
	

		notSorted=False
		if srt_pw_list_prev!=srt_pw_list:
			notSorted=True
		
		ipair+=1

		if ipair >= MaxLoop:
			print('Warning: reached maximum number of sweeps for pairwise comparison without success')
			print('Using current data, but discrepancies between comparisons exist.')
			

	print('Pairwise sweep complete..')

	factor=1.0
	
	for item in srt_pw_list:
		factor*= srt_pw_dict[item]
		global_dict[item]['factor'] = factor
		global_dict[item]['f_approx'] = False
		print(item, global_dict[item]['factor'] )



	sl.save_obj(global_dict, fname)

	return global_dict, global_dict3	
Exemplo n.º 6
0
def calc_basics_demo(recalc=False):
    if (recalc):
        buildings = xmlparser.getBuildings()
        firestations = xmlparser.getFireStations()
        roads = xmlparser.getRoads()
        coords = xmlparser.getNodesCoords()
        weights = {}
        for building in buildings:
            weights[building['id']] = 1.
        for firestation in firestations:
            weights[firestation['id']] = random.random() + 1

        sl.save_obj(firestations, "firestations")
        sl.save_obj(buildings, "buildings")
        sl.save_obj(roads, "roads")
        sl.save_obj(coords, "coords")
        sl.save_obj(weights, "weights")

        print("demo files recalculated")

    G = graph.GetGraphListWithRead()

    sl.save_obj(G, "adj_list_demo")
Exemplo n.º 7
0
def pairwise_queryset(global_dict,
                      query_set,
                      trange,
                      fname,
                      ptrend=None,
                      loc='GB',
                      tol=1e-1):
    MaxLoop = 75
    ref_fact = 1.0
    allfacts_pw = np.ones(len(query_set)) * ref_fact
    ipair = 0
    notSorted = True
    cmp2ext = '_2comp'

    allquers = []
    allfacts = []

    print('Data comparison before pairwise sweep:')
    print('Query    Factor    Approx?')

    for quer in query_set:
        allfacts.append(global_dict[quer]['factor'])
        allquers.append(quer)

    allquers = sort_queries(allquers, allfacts)

    global_dict3 = {}

    if os.path.exists('obj/' + fname + cmp2ext + '.pkl'):
        print('Loading previous for 2 comparison file...')
        global_dict3 = sl.load_obj(fname + cmp2ext)

    allquers_prev = allquers
    iqueries = np.arange(len(allquers))
    iqsrt = iqueries
    iqsrt_prev = iqueries

    tmp_dict = {}
    iq = 0
    order = {}
    for q in allquers:
        tmp_dict[q] = global_dict[q]['factor']
        order[q] = iq
        iq += 1

    srt_pw_dict = OrderedDict(
        sorted(tmp_dict.items(), key=operator.itemgetter(1), reverse=True))

    data_warning = {}
    prev_order = None
    while notSorted and ipair < MaxLoop:

        print('Pairwise sweep %d/%d' % (ipair + 1, MaxLoop))

        srt_pw_dict_new = {}
        for key in srt_pw_dict:
            srt_pw_dict_new[key] = 1.
            break

        #Loop through query indices, sorted into descending popularity
        iiq = 0
        for quer in srt_pw_dict:
            if iiq < len(srt_pw_dict) - 1:
                if ptrend == None:
                    ptrend = TrendReq(hl='en-UK', tz=0)

                link_prev, link_next, key = srt_pw_dict._OrderedDict__map[quer]
                next_quer = link_next[2]

                qstring = str(quer) + ", " + str(next_quer)
                qtmp = [quer, next_quer]

                #print(qtmp, order[quer], order[next_quer])
                if not qstring in global_dict3:
                    print('Requesting data for: ', qstring, order[quer],
                          order[next_quer])

                    global_dict3[qstring] = {
                        'data': relative_data(qtmp,
                                              ptrend,
                                              trange=trange,
                                              loc=loc),
                        'factor': 1.0
                    }

                    sl.save_obj(global_dict3, fname + cmp2ext)
                """if 'Mountain ringlet' in qtmp:
					for key in global_dict3[qstring]['data']:
						print(key)
					
					plt.plot(global_dict3[qstring]['data']['Date'], global_dict3[qstring]['data']['Mountain ringlet'])
					plt.show()
				if 'Polyommatus icarus' in qtmp:
					for key in global_dict3[qstring]['data']:
						print(key)
					
					plt.plot(global_dict3[qstring]['data']['Date'], global_dict3[qstring]['data']['Polyommatus icarus'])
					plt.show()"""

                num = float(sum(global_dict3[qstring]['data'][qtmp[1]]))
                denom = float(sum(global_dict3[qstring]['data'][qtmp[0]]))

                if num == 0.0:
                    print(
                        'Warning: 0 reached for second value in drop factor...'
                    )
                    frac_drop = 100.0
                elif denom == 0.0:
                    print(
                        'Warning: 0 reached for initial value in drop factor...'
                    )
                    frac_drop = 0.01
                else:
                    frac_drop = num / denom

                srt_pw_dict_new[next_quer] = srt_pw_dict_new[quer] * frac_drop

                if frac_drop > 100. or frac_drop < 0.01:
                    print(
                        'Error: pairwise comparison yielded a drop fraction greater than 100'
                    )
                    print('Comparison terms: {0} vs. {1}'.format(
                        qtmp[0], qtmp[1]))
                    print('\n**********\nData 1:',
                          global_dict3[qstring]['data'][qtmp[0]])
                    print('\n**********\nData 2:',
                          global_dict3[qstring]['data'][qtmp[1]])
                    exit()

                qtmp = []
            iiq += 1

        srt_pw_dict_prev = copy.copy(srt_pw_dict)
        srt_pw_dict = OrderedDict(
            sorted(srt_pw_dict_new.items(),
                   key=operator.itemgetter(1),
                   reverse=True))

        order = {}

        notSorted = False

        err_warning = {}

        iiq = 0
        num_out = 0
        big_err = 0.
        sum_err = 0.
        errs = []
        for key in srt_pw_dict:
            link_prev, link_next, key = srt_pw_dict._OrderedDict__map[key]
            next_quer = link_next[2]

            link_prev, link_next, key = srt_pw_dict_prev._OrderedDict__map[key]
            next_quer_prev = link_next[2]

            if next_quer != None:
                rat1 = srt_pw_dict[key] / srt_pw_dict[next_quer]
                rat2 = srt_pw_dict_prev[key] / srt_pw_dict_prev[next_quer]
            else:
                rat1 = 1.
                rat2 = 1.

            print('%d. %s :      %.2e    %.2e |   %.2e    %.2e ' %
                  (iiq, key, srt_pw_dict[key], srt_pw_dict_prev[key], rat1,
                   rat2))
            if prev_order != None:
                print(prev_order[key])
            err = abs((rat1 - rat2) / rat1)
            sum_err += err
            errs.append(err)
            if err > tol:
                if err > big_err:
                    big_err = err
                err_warning[key] = True
                notSorted = True
                num_out += 1
            else:
                err_warning[key] = False

            order[key] = iiq

            iiq += 1

        print('Number out:', num_out)
        print('Biggest error:', big_err)
        print('Mean error:', sum_err / float(len(srt_pw_dict)))

        print('Median error:', np.median(np.array(errs)))

        prev_order = copy.copy(order)

        ipair += 1

        if ipair >= MaxLoop:
            print(
                'Warning: reached maximum number of sweeps for pairwise comparison without success'
            )
            print(
                'Using current data, but discrepancies between comparisons exist.'
            )

    print('Pairwise sweep complete..')
    exit()
    #global_dict[qtmp[1]]['factor'] = global_dict[qtmp[0]]['factor']*float(max(global_dict3[qstring]['data'][qtmp[1]]))/float(max(global_dict3[qstring]['data'][qtmp[0]]))

    for iq in range(len(allquers)):
        global_dict[allquers[iq]]['factor'] = allfacts_pw[iq]
        global_dict[allquers[iq]]['f_approx'] = False

    print('Data comparison after pairwise sweep:')
    print('Query    Factor    Approx?')

    for quer in query_set:
        print(quer, global_dict[quer]['factor'], global_dict[quer]['f_approx'])
        allfacts.append(global_dict[quer]['factor'])
        allquers.append(quer)

    sl.save_obj(global_dict, fname)

    return global_dict, global_dict3
Exemplo n.º 8
0
        bits_y_arr = np.flip(bits_y_arr,axis=1)

        def flip_bits(x):
            if x > 0.5:
                return 0
            else:
                return 1

        vec_flip = np.vectorize(flip_bits)
        bits_y_arr_flip = vec_flip(bits_y_arr)
        bits_y_arr = np.stack((bits_y_arr_flip, bits_y_arr), axis=2)
        return bits_x_arr, bits_y_arr


if __name__ == '__main__':

    train_X, train_y = BitsDataset.get_dataset(256,4)

    print(train_X.shape, train_y.shape)
    save_obj(train_X, '../data/bits/train_X')
    save_obj(train_y, '../data/bits/train_y')

    test_X, test_y = BitsDataset.get_dataset(256,4)
    save_obj(test_X, '../data/bits/test_X')
    save_obj(test_y, '../data/bits/test_y')
    i = 0
    print(test_X[i:i+1])
    print(test_y[i:i+1,:,-1:])


Exemplo n.º 9
0
def pairwise_queryset(global_dict,
                      query_set,
                      trange,
                      fname,
                      ptrend=None,
                      loc='GB',
                      tol=1e-2):
    MaxLoop = 4
    ref_fact = 1.0
    allfacts_pw = np.ones(len(query_set)) * ref_fact
    ipair = 0
    notSorted = True
    cmp2ext = '_2comp'

    allquers = []
    allfacts = []

    print('Data comparison before pairwise sweep:')
    print('Query    Factor    Approx?')

    for quer in query_set:
        print(quer, global_dict[quer]['factor'], global_dict[quer]['f_approx'])
        allfacts.append(global_dict[quer]['factor'])
        allquers.append(quer)

    allquers = sort_queries(allquers, allfacts)

    global_dict3 = {}

    if os.path.exists('obj/' + fname + cmp2ext + '.pkl'):
        print('Loading previous for 2 comparison file...')
        global_dict3 = sl.load_obj(fname + cmp2ext)

    allquers_prev = allquers
    iqueries = np.arange(len(allquers))
    iqsrt = iqueries
    iqsrt_prev = iqueries

    tmp_dict = {}
    for q in allquers:
        tmp_dict[q] = 1.0

    srt_pw_dict = OrderedDict(
        sorted(tmp_dict.items(), key=operator.itemgetter(1)))

    while notSorted and ipair < MaxLoop:

        print('Pairwise sweep %d/%d' % (ipair + 1, MaxLoop))
        allfacts_pw_prev = copy.copy(allfacts_pw)

        #allfacts_pw = np.ones(len(allfacts_pw))

        #Loop through query indices, sorted into descending popularity
        for iiq in iqueries:

            #If last search, use previous as the reference value
            if iiq < len(allquers) - 1:
                pqm = iiq
                pqp = iiq + 1
            else:
                pqm = iiq - 1
                pqp = iiq

            iq1 = iqsrt[pqm]
            iq2 = iqsrt[pqp]

            qstring = str(allquers[iq1]) + ", " + str(allquers[iq2])
            qtmp = [allquers[iq1], allquers[iq2]]

            print('Iq1:', iq1, 'Iq2:', iq2)
            print(
                np.where(iqsrt_prev == iq1)[0],
                np.where(iqsrt_prev == iq2)[0])

            print(allfacts_pw[iq1])
            print(allfacts_pw[iq2])

            if ptrend == None:
                ptrend = TrendReq(hl='en-UK', tz=0)

            if not qstring in global_dict3:
                #print('Requesting data for: ', qstring)

                global_dict3[qstring] = {
                    'data': relative_data(qtmp, ptrend, trange=trange,
                                          loc=loc),
                    'factor': 1.0
                }

                sl.save_obj(global_dict3, fname + cmp2ext)

            num = float(max(global_dict3[qstring]['data'][qtmp[1]]))
            denom = float(max(global_dict3[qstring]['data'][qtmp[0]]))

            if num == 0.0:
                print('Warning: 0 reached for second value in drop factor...')
                frac_drop = 100.0
            elif denom == 0.0:
                print('Warning: 0 reached for initial value in drop factor...')
                frac_drop = 0.01
            else:
                frac_drop = num / denom

            allfacts_pw[iq2] = allfacts_pw[iq1] * frac_drop

            if frac_drop > 100. or frac_drop < 0.01:
                print(
                    'Error: pairwise comparison yielded a drop fraction greater than 100'
                )
                print('Comparison terms: {0} vs. {1}'.format(qtmp[0], qtmp[1]))
                print('\n**********\nData 1:',
                      global_dict3[qstring]['data'][qtmp[0]])
                print('\n**********\nData 2:',
                      global_dict3[qstring]['data'][qtmp[1]])
                exit()

            qtmp = []

        iqsrt_prev = copy.copy(iqsrt)
        iqsrt = sort_queries(copy.copy(iqueries), allfacts_pw)
        print(iqsrt)
        print(iqsrt_prev)

        print('\n***************')
        print('Query   -   Factor')
        notSorted = False

        iiq = 0
        for iq in iqsrt:
            print('%d. %s :      %.2e    %d' %
                  (iiq, allquers[iq], allfacts_pw[iq] / allfacts_pw_prev[iq],
                   iqsrt_prev[iq]))
            if abs((allfacts_pw[iq] - allfacts_pw_prev[iq]) /
                   allfacts_pw[iq]) > tol:
                notSorted = True

            iiq += 1

        ipair += 1

        if ipair >= MaxLoop:
            print(
                'Error: reached maximum number of sweeps for pairwise comparison without success'
            )
            exit()

    print('Pairwise sweep complete..')
    exit()
    #global_dict[qtmp[1]]['factor'] = global_dict[qtmp[0]]['factor']*float(max(global_dict3[qstring]['data'][qtmp[1]]))/float(max(global_dict3[qstring]['data'][qtmp[0]]))

    for iq in range(len(allquers)):
        global_dict[allquers[iq]]['factor'] = allfacts_pw[iq]
        global_dict[allquers[iq]]['f_approx'] = False

    print('Data comparison after pairwise sweep:')
    print('Query    Factor    Approx?')

    for quer in query_set:
        print(quer, global_dict[quer]['factor'], global_dict[quer]['f_approx'])
        allfacts.append(global_dict[quer]['factor'])
        allquers.append(quer)

    sl.save_obj(global_dict, fname)

    return global_dict, global_dict3
Exemplo n.º 10
0
	def save(self):
		if not os.path.isdir('obj'):
			os.makedirs('obj')
		
		sl.save_obj(self, self.name+'.gtr')