Exemplo n.º 1
0
def get_grid(grid_size, data_sample, grid, nr_words):
	input_template = input_directory_cooc + r"\complete_cooc\_"
		
	
	non_blob_word_counter=0
	word_counter=0
	print("at file")
	for letter in "abcdefghijklmnopqrstuvwxyz":
		try:
			print(letter)
			f = open(input_template+letter+".txt",'r')		
			line_nr = 0
			for line in f:
				line = line.replace(";\n", "")
				line = line.split(";")
				word = line[0]
				
				word_counter += 1
				if word not in grid[2]:
					del line[0]
					if len(line) == 0:
						print( word, "at line", line_nr, "in file", letter)
						nr_words -= 1
					else:
						non_blob_word_counter+=1
						best_index = add_to_sim_word(word, line, data_sample, grid)		
						grid[3][best_index].append(word)
			f.close()
		except IOError:
			print("file", letter, "not found")
	print("\n New nr words:", nr_words)
	print("nr non blob words actually encountered",non_blob_word_counter,"all actually seen words", word_counter)
	del data_sample
	
	colors = get_colors()	
	write_blob_file(grid)
	
	if space_to_grid_type == "new":
		assignment = puzzle_grid.space_to_grid(grid, colors,nr_words, output_directory, grid_size)
		new_grid_size = math.ceil(math.sqrt(nr_words))
		index = 0
		for i in range(new_grid_size):
			for j in range(new_grid_size):
				if assignment[i][j] != None:
					grid_f[i][j] = GridElem(index,[i,j],assignment[i][j].name,assignment[i][j].color, assignment[i][j].blob_nr)
					global_index[index] = grid_f[i][j]
					global_name[grid_f[i][j].name] = grid_f[i][j]
					index+=1				
		
	elif space_to_grid_type == "balls" or space_to_grid_type == "stripy":
		enlarge = 5
		new_grid_size = math.ceil(math.sqrt(nr_words))+enlarge
		ratio = new_grid_size/float(grid_size)	
		ratio_resize = ratio/scale_factor
		# shift_resize = (new_grid_size - grid_size*ratio_resize)/2 + ratio_resize
		shift_resize = (new_grid_size - grid_size*ratio_resize)/2 + ratio_resize/2
		print("shift", shift_resize)
		data_space = np.zeros((nr_words,2))
		index = 0
		
		for i in range(len(grid[0])):
			x = grid[1][i] 
			y = grid_size-1-grid[0][i]
			new_pos = np.array([x ,y])* ratio_resize + shift_resize
			data_space[index,:] = new_pos
			global_index[index] = [grid[2][i],i]
			assigned = len(grid[3][i])
			index+=1		
			data_space[index:index+assigned,0].fill(new_pos[0])
			data_space[index:index+assigned,1].fill(new_pos[1])
			random_dist = np.random.rand(assigned,2)
			# print("random", random_dist.shape)
			# print("data space slice", data_space[index:index+assigned,:].shape)
			data_space[index:index+assigned,:] = (random_dist-0.5)*ratio_resize + data_space[index:index+assigned,:]
			 
			for elem in grid[3][i]:
				global_index[index] = [elem, i]
				index+=1
		
		print( "data space shape", data_space.shape)
		print("last index = ", index-1 )
		used_marker = "o"
		if nr_words > 1000:
			used_marker = "."	
		coloring = []
		color_index = {}
		color_file = open(output_directory+r"\color_file.txt", "w")
		print("COLOR FILE")
		for i in range(nr_words):
			coloring.append( colors[global_index[i][1]%len(colors)] )	
			color_index[i] = colors[global_index[i][1]%len(colors)]
			color_file.write(str(i) + ";" + str(color_index[i])+"\n")
		color_file.close()	

				
		################
		image_name = output_directory + r"\test_grid_coloring.pdf"	
		fig = plt.figure()
		plt.plot(data_space[0,0], data_space[0,1], c=coloring[0], marker='o')
		plt.text(data_space[0,0]+1, data_space[0,1], grid[2][0])
		n = data_space.shape[0]-1
		plt.plot(data_space[n,0], data_space[n,1], c=coloring[n], marker='o')
		plt.text(data_space[n,0]+1, data_space[n,1], grid[2][len(grid[2])-1])
		plt.axis([-1,new_grid_size+1, -1, new_grid_size+1])
		plt.title("BLOB TEST")
		fig.savefig(image_name, bbox_inches='tight')
		plt.close()	
		################
		
		image_name = output_directory + r"\stg_init_plot_blobColoring.pdf"	
		fig = plt.figure(figsize=(figure_size, figure_size))
		# fig = plt.figure(figsize=(figure_size, figure_size), dpi=figure_dpi)
		prop_plot = plt.scatter(data_space[:,0], data_space[:,1], c=coloring, marker=used_marker)
		if nr_words > 1000:
			prop_plot.set_edgecolor("none")
		plt.axis([-1,new_grid_size+1, -1, new_grid_size+1])
		plt.title("Initial blob coloring")
		fig.savefig(image_name, bbox_inches='tight')
		plt.close()
		
		print("new grid size", new_grid_size)
		print("old grid size", old_grid_size)
		intermediate_grids = output_directory + "\intermediate_grids" 
		if not os.path.exists(intermediate_grids):
			os.makedirs(intermediate_grids)	
			print("directory made")
		if space_to_grid_type == "stripy":
			assignment, gz = stg.space_to_grid_iterative(data_space, intermediate_grids,  log_memory, with_figures=False, blob_nr_keeper=stg.TypeKeeper(color_index), scale=False, grid_enlarge = enlarge)
		else:
			assignment, gz = stg_nice_try.space_to_grid_iterative(data_space, intermediate_grids,  log_memory, with_figures=False, blob_nr_keeper=stg.TypeKeeper(color_index), scale=False)
		del data_space
		
		print("init final grid and make figure of grid")
		image_name = output_directory + r"\stg_result_plot_blobColoring.pdf"
		# fig = plt.figure(figsize=(figure_size, figure_size), dpi=figure_dpi)
		fig = plt.figure(figsize=(figure_size, figure_size))
		for elem in assignment:
			row = int(new_grid_size)-elem[1]-1
			column = elem[0]
			# print("elem", elem, "row", row, "column", column)
			grid_f[row][column] =  GridElem(elem[2],[row,column], global_index[elem[2]][0], color_index[elem[2]], global_index[elem[2]][1] )
			global_index[elem[2]] = grid_f[row][column]
			global_name[grid_f[row][column].name] = grid_f[row][column]
			prop_plot = plt.scatter(elem[0], elem[1], c=color_index[elem[2]] , marker=used_marker)
			if nr_words > 1000:
				prop_plot.set_edgecolor("none")
		plt.axis([-1,new_grid_size, -1, new_grid_size])
		plt.title("Resulting blob coloring")
		fig.savefig(image_name, bbox_inches='tight')	
		plt.close()
	
	print("Initial grid returns")
	return new_grid_size, nr_words
Exemplo n.º 2
0
def space_to_grid(reduced_2):
    return stg.space_to_grid_iterative(reduced_2, result_path)
Exemplo n.º 3
0
def get_grid(grid_size, data_sample, grid, nr_words):
    input_template = input_directory_cooc + r"\complete_cooc\_"

    if data_case_name == r"\limit1000_freq1_small_sample":
        input_template = r"D:\Users\Lydia\results word cooc\limit1000_freq1\complete_cooc\_"

    print "at file",
    for letter in "abcdefghijklmnopqrstuvwxyz":
        try:
            print letter,
            f = open(input_template + letter + ".txt", 'r')
            line_nr = 0
            for line in f:
                line = line.replace(";\n", "")
                line = line.split(";")
                word = line[0]
                if word not in grid[2]:
                    del line[0]
                    if len(line) == 0:
                        print word, "at line", line_nr, "in file", letter
                        nr_words -= 1
                    else:
                        best_index = add_to_sim_word(word, line, data_sample,
                                                     grid)
                        grid[3][best_index].append(word)
            f.close()
        except IOError:
            print letter,
    print "\n New nr words:", nr_words
    del data_sample

    new_grid_size = math.ceil(math.sqrt(nr_words))
    ratio = new_grid_size / float(grid_size)
    ratio_resize = ratio / 1.2
    shift_resize_x = (new_grid_size -
                      grid_size * ratio_resize) / 2 + ratio_resize
    shift_resize_y = (new_grid_size -
                      grid_size * ratio_resize) / 2 + ratio_resize
    data_space = np.zeros((nr_words, 2))
    index = 0
    blob_file = open(output_directory + r"\blob_file.txt", "w")
    for i in range(len(grid[0])):
        new_pos = np.array([
            round(grid[0][i] * ratio_resize + shift_resize_x),
            round(grid[1][i] * ratio_resize + shift_resize_y)
        ])
        data_space[index, :] = new_pos
        global_index[index] = [grid[2][i], i]
        index += 1
        assigned = len(grid[3][i])
        data_space[index:index + assigned, 0].fill(new_pos[0])
        data_space[index:index + assigned, 1].fill(new_pos[1])
        random_dist = np.random.rand(assigned, 2)
        data_space[index:index + assigned, :] = (
            random_dist - 0.5) * ratio_resize + data_space[index:index +
                                                           assigned, :]
        blob_file.write(str(i) + " " + grid[2][i] + " " + str(len(grid[3][i])))
        for elem in grid[3][i]:
            global_index[index] = [elem, i]
            index += 1
            blob_file.write(" " + elem)
        blob_file.write("\n")
    blob_file.close()

    print "data space shape", data_space.shape
    print "last index = ", index - 1
    used_marker = "o"
    if nr_words > 1000:
        used_marker = "."
    colors = get_colors()
    coloring = []
    color_index = {}
    for i in range(nr_words):
        coloring.append(colors[global_index[i][1] % len(colors)])
        color_index[i] = colors[global_index[i][1] % len(colors)]

    image_name = output_directory + r"\stg_init_plot_blobColoring.pdf"
    fig = plt.figure(figsize=(figure_size, figure_size))
    # fig = plt.figure(figsize=(figure_size, figure_size), dpi=figure_dpi)
    prop_plot = plt.scatter(data_space[:, 1],
                            new_grid_size - 1 - data_space[:, 0],
                            c=coloring,
                            marker=used_marker)
    if nr_words > 1000:
        prop_plot.set_edgecolor("none")
    plt.axis([-1, new_grid_size + 1, -1, new_grid_size + 1])
    plt.title("Initial blob coloring")
    fig.savefig(image_name, bbox_inches='tight')
    plt.close()

    print "new grid size", new_grid_size
    intermediate_grids = output_directory + "\intermediate_grids"
    if not os.path.exists(intermediate_grids):
        os.makedirs(intermediate_grids)
        print "directory made"
    assignment, gz = stg.space_to_grid_iterative(
        data_space,
        intermediate_grids,
        with_figures=False,
        blob_nr_keeper=stg.TypeKeeper(color_index))
    del data_space

    print "init final grid and make figure of grid"
    image_name = output_directory + r"\stg_result_plot_blobColoring.pdf"
    # fig = plt.figure(figsize=(figure_size, figure_size), dpi=figure_dpi)
    fig = plt.figure(figsize=(figure_size, figure_size))
    for elem in assignment:
        grid_f[elem[0]][elem[1]] = GridElem(elem[2], elem[0:2],
                                            global_index[elem[2]][0],
                                            color_index[elem[2]],
                                            global_index[elem[2]][1])
        global_index[elem[2]] = grid_f[elem[0]][elem[1]]
        global_name[grid_f[elem[0]][elem[1]].name] = grid_f[elem[0]][elem[1]]
        prop_plot = plt.scatter(elem[1],
                                new_grid_size - 1 - elem[0],
                                c=color_index[elem[2]],
                                marker=used_marker)
        if nr_words > 1000:
            prop_plot.set_edgecolor("none")
    plt.axis([-1, new_grid_size, -1, new_grid_size])
    plt.title("Resulting blob coloring")
    fig.savefig(image_name, bbox_inches='tight')
    plt.close()

    return new_grid_size, nr_words
Exemplo n.º 4
0
def get_grid(grid_size, data_sample, grid, nr_words):
	input_template = input_directory_cooc + r"\complete_cooc\_"
	
	if data_case_name == r"\limit1000_freq1_small_sample":
		input_template = r"D:\Users\Lydia\results word cooc\limit1000_freq1\complete_cooc\_"
	
	print "at file",
	for letter in "abcdefghijklmnopqrstuvwxyz":
		try:
			print letter,
			f = open(input_template+letter+".txt",'r')		
			line_nr = 0
			for line in f:
				line = line.replace(";\n", "")
				line = line.split(";")
				word = line[0]
				if word not in grid[2]:
					del line[0]
					if len(line) == 0:
						print word, "at line", line_nr, "in file", letter
						nr_words -= 1
					else:
						best_index = add_to_sim_word(word, line, data_sample, grid)		
						grid[3][best_index].append(word)
			f.close()
		except IOError:
			print letter, 
	print "\n New nr words:", nr_words
	del data_sample
		
		
	new_grid_size = math.ceil(math.sqrt(nr_words))
	ratio = new_grid_size/float(grid_size)	
	ratio_resize = ratio/1.2
	shift_resize_x = (new_grid_size - grid_size*ratio_resize)/2 + ratio_resize
	shift_resize_y = (new_grid_size - grid_size*ratio_resize)/2 + ratio_resize
	data_space = np.zeros((nr_words,2))
	index = 0
	blob_file = open(output_directory + r"\blob_file.txt", "w")
	for i in range(len(grid[0])):
		new_pos = np.array([ round(grid[0][i] * ratio_resize + shift_resize_x),round(grid[1][i] * ratio_resize + shift_resize_y)])
		data_space[index,:] = new_pos
		global_index[index] = [grid[2][i],i]
		index+=1		
		assigned = len(grid[3][i])
		data_space[index:index+assigned,0].fill(new_pos[0])
		data_space[index:index+assigned,1].fill(new_pos[1])
		random_dist = np.random.rand(assigned,2)
		data_space[index:index+assigned,:] = (random_dist-0.5)*ratio_resize + data_space[index:index+assigned,:]
		blob_file.write(str(i) + " " + grid[2][i] + " " + str(len(grid[3][i]))) 
		for elem in grid[3][i]:
			global_index[index] = [elem, i]
			index+=1
			blob_file.write(" " + elem)
		blob_file.write("\n")
	blob_file.close()		
	
	print "data space shape", data_space.shape
	print "last index = ", index-1 
	used_marker = "o"
	if nr_words > 1000:
		used_marker = "."	
	colors = get_colors()		
	coloring = []
	color_index = {}
	for i in range(nr_words):
		coloring.append( colors[global_index[i][1]%len(colors)] )	
		color_index[i] = colors[global_index[i][1]%len(colors)]
		
	image_name = output_directory + r"\stg_init_plot_blobColoring.pdf"	
	fig = plt.figure(figsize=(figure_size, figure_size))
	# fig = plt.figure(figsize=(figure_size, figure_size), dpi=figure_dpi)
	prop_plot = plt.scatter( data_space[:,1], new_grid_size-1-data_space[:,0], c=coloring, marker=used_marker)
	if nr_words > 1000:
		prop_plot.set_edgecolor("none")
	plt.axis([-1,new_grid_size+1, -1, new_grid_size+1])
	plt.title("Initial blob coloring")
	fig.savefig(image_name, bbox_inches='tight')
	plt.close()
	
	print "new grid size", new_grid_size
	intermediate_grids = output_directory + "\intermediate_grids" 
	if not os.path.exists(intermediate_grids):
		os.makedirs(intermediate_grids)	
		print "directory made"
	assignment, gz = stg.space_to_grid_iterative(data_space, intermediate_grids, with_figures=False, blob_nr_keeper=stg.TypeKeeper(color_index))
	del data_space
	
	print "init final grid and make figure of grid"
	image_name = output_directory + r"\stg_result_plot_blobColoring.pdf"
	# fig = plt.figure(figsize=(figure_size, figure_size), dpi=figure_dpi)
	fig = plt.figure(figsize=(figure_size, figure_size))
	for elem in assignment:
		grid_f[elem[0]][elem[1]] =  GridElem(elem[2], elem[0:2], global_index[elem[2]][0], color_index[elem[2]], global_index[elem[2]][1] )
		global_index[elem[2]] = grid_f[elem[0]][elem[1]]
		prop_plot = plt.scatter(elem[1], new_grid_size-1-elem[0], c=color_index[elem[2]] , marker=used_marker)
		if nr_words > 1000:
			prop_plot.set_edgecolor("none")
	plt.axis([-1,new_grid_size, -1, new_grid_size])
	plt.title("Resulting blob coloring")
	fig.savefig(image_name, bbox_inches='tight')	
	plt.close()
	
	return new_grid_size, nr_words
Exemplo n.º 5
0
def space_to_grid(reduced_2):
	return stg.space_to_grid_iterative(reduced_2, result_path)