def extractFullMIToThesaurus():
	accents = Accents()
	parameters = Parameters()
	max_qty_terms = parameters.getMaxQtyTerms()
	seeds = Seeds()
	dic_seeds = seeds.getSeeds()
	mi_file = Statistic(stat_temp+'IMT_FullStatisticalCorpus.txt')

	try:
		thesaurus_file = codecs.open('../Data/Output/T3/T3_Jaccard.xml', 'w', 'utf-8')
	except IOError:
		print 'ERROR: System cannot open the  file ../Data/Output/T3/T3_Jaccard.xml'
		sys.exit()

	thesaurus_file.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n<thesaurus>\n\t<ontology id="privacy">\n')
	for seed in dic_seeds:
		qty_terms = 0
		dic_related = mi_file.getOrderedNounMIForTerm(seed)
		if dic_related != False:
			thesaurus_file.write('\t\t<seed term_id="" term_name="'+accents.buildAccents(seed)+'" type="">\n')
			for mi_related in dic_related:
				if qty_terms < max_qty_terms:
					thesaurus_file.write('\t\t\t<term id="" display="ON" similarity="'+mi_related[0]+'">'+accents.buildAccents(mi_related[1])+'</term>\n')
					qty_terms += 1
			thesaurus_file.write('\t\t</seed>\n')
	thesaurus_file.write('\t</ontology>\n</thesaurus>')
	thesaurus_file.close()
Exemplo n.º 2
0
	def __init__(self, temp_folder, file_input, seedfile, mi_precision):
		self.window_size = file_input[1:-23]
		self.temp_folder = temp_folder
		self.misc = Miscelaneous()
		seeds_file = Seeds(seedfile)
		self.list_seeds = seeds_file.getSeeds()
		self.first_line = ''
		self.dic_tuplas = defaultdict(dict)
		self.dic_terms = OrderedDict()
		self.__buildMI__(file_input, mi_precision)
Exemplo n.º 3
0
	def __init__(self, seedfile, temp_folder, sim_measure):
		self.misc = Miscelaneous()
		seeds_file = Seeds(seedfile)
		self.temp_folder = temp_folder
		self.dic_nouns = {}
		self.dic_seeds = defaultdict(dict)
		#self.dic_seeds_freqObj = {}
		#self.dic_seeds_Obj = {}
		self.list_seeds = seeds_file.getSeeds()
		self.dic_measure = defaultdict(dict)
		self.dic_Obj2 = defaultdict(dict)
		self.dic_freqObj = {}
		self.dic_Obj = {}

		self.__buildHashs__(sim_measure)
Exemplo n.º 4
0
    def Generate_Neighbor_Groups(self):
        list = []
        if self.seed_type == 1:
            s = Seeds()
            s.Config(self.seed_type, self.max_cell_value,
                     self.linear_kernel_length)
            group = s.Generate_Seed_Range(
                0, (self.max_cell_value**self.linear_kernel_length),
                self.linear_kernel_length)  #only send parameter in this case
            for i in range(len(group) - 1, -1, -1):
                list.append("".join(str(e) for e in group[i]))
        elif self.seed_type == 2:

            for i in range(len(self.seed) - 1, -1, -1):
                list.append(
                    round((float(i) / self.linear_kernel_length),
                          Grid.round_value))
        self.seed_length = len(list)
        return list
Exemplo n.º 5
0
	def __init__(self, ctx_freq_file, seedfile):
		self.misc = Miscelaneous()
		seeds_file = Seeds(seedfile)
		self.list_seeds = seeds_file.getSeeds()
		self.dic_baseline = defaultdict(dict)
		self.dic_diceBin = defaultdict(dict)
		self.dic_diceMin = defaultdict(dict)
		self.dic_jaccard = defaultdict(dict)
		self.dic_cosineBin = defaultdict(dict)
		self.dic_cosine = defaultdict(dict)
		self.dic_city = defaultdict(dict)
		self.dic_euclidean = defaultdict(dict)
		self.dic_js = defaultdict(dict)
		self.dic_lin = defaultdict(dict)
		self.dic_jaccardMax = defaultdict(dict)
		self.dic_ctx = defaultdict(dict)
		self.dic_sum_freq_noun = {}
		self.dic_qty_noun = {}
		self.__buildHashs__(ctx_freq_file, seedfile)
Exemplo n.º 6
0
	def buildSTRelations(self, file_input, seeds_file):
		seeds = Seeds(seeds_file)
		list_seeds = seeds.getSeeds()
		dic_tuplas = {}
		file_bigrams = self.misc.openFile(self.temp_folder+''+file_input, 'r')
		first_line = ''

		for line in file_bigrams:
			if first_line != '':
				part = line.split('<>')
				term_type1 = part[0]
				term_type2 = part[1]
				term1, type1 = term_type1.split('__')
				term2, type2 = term_type2.split('__')

				freq_tupla = part[2].split(' ')[0]
				freq_term1 = part[2].split(' ')[1]
				freq_term2 = part[2].split(' ')[2]
				
				if type1 == 'N' and term1 != term2:
					if dic_tuplas.has_key(term2+'#'+term1+'#'):
						dic_tuplas[term2+'#'+term1+'#'] += int(freq_tupla)
					else:
						dic_tuplas[term2+'#'+term1+'#'] = int(freq_tupla)
				if type2 == 'N' and term1 != term2:
					if dic_tuplas.has_key(term1+'#'+term2+'#'):
						dic_tuplas[term1+'#'+term2+'#'] += int(freq_tupla)
					else:
						dic_tuplas[term1+'#'+term2+'#'] = int(freq_tupla)
			else:
				first_line = line
		file_bigrams.close()

		file_relations = self.misc.openFile(self.temp_folder+'W'+str(self.window_size)+'_Relations.txt', 'w')
		for tupla in dic_tuplas:
			file_relations.write(tupla+''+str(dic_tuplas[tupla])+'\n')
		file_relations.close()
def buildToLinguaToolkit():
	try:
		seeds_to_related_file = codecs.open(temp_folder+'seedsToRelated.txt', 'w', 'utf-8')
	except IOError:
		print 'ERROR: System cannot open the '+temp_folder+'seedsToRelated.txt file'
		sys.exit()
	seeds_file = Seeds()
	dic_seeds = seeds_file.getSeeds()
	dic_noun = {}

	for terms in dic_an:
		dic_noun[terms.split('#')[2]] = terms.split('#')[2]
	for terms in dic_sv:
		dic_noun[terms.split('#')[2]] = terms.split('#')[2]	
	for terms in dic_vo:
		dic_noun[terms.split('#')[2]] = terms.split('#')[2]

	for noun in dic_noun:
		for seed in dic_seeds:
			if noun != seed:
				seeds_to_related_file.write(seed+'#'+noun+'\n')

	command = "cat "+temp_folder+"seedsToRelated.txt | perl measures.perl "+temp_folder+"tempMergedFiles.txt 1 | gawk '{print $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13}' > "+temp_folder+"Similarities.txt"
	os.system(command)
import re
import codecs

from collections import defaultdict
from StatisticalCorpus import StatisticalCorpus
from Parameters import Parameters
from Seeds import Seeds
from Accents import Accents

temp_folder = '../Temp/'
stat_corpus = '../Data/Corpus/Statistical/'
stat_temp = temp_folder+'Statistical/'
output_folder = '../Data/Output/'
parameters = Parameters()
max_qty_terms = parameters.getMaxQtyTerms()
seeds = Seeds()
list_seeds = seeds.getSeeds()
accents = Accents()

def mainscript():
	StatisticalCorpus()
	executeMutualInformation('Full')
	executeMutualInformation('Noun')
	getThesaurusFromSeeds('Full')
	getThesaurusFromSeeds('Noun')

def executeMutualInformation(typefile):
	command = 'count.pl --ngram 2 --window '+str(parameters.getWindowSize())+' '+stat_temp+'W'+str(parameters.getWindowSize())+'_'+typefile+'StatisticalCorpus.txt '+stat_corpus+''+typefile+'StatisticalCorpus.txt'
	os.system(command)

	try:
Exemplo n.º 9
0
    def Setup_Seeds(self, seed_group, rule_type, base, kernel, first_seeds,
                    remaining_seeds):
        self.seed_group = seed_group
        self.rule_type = rule_type
        self.base = base
        self.kernel = Kernel(kernel)
        self.first_seed_option = first_seeds
        self.remaining_seed_option = remaining_seeds
        self.all_seeds = []
        self.seedControl = Seeds()
        self.seedControl.Config(self.rule_type, self.base,
                                self.kernel.linear_kernel_length)
        for S in range(self.stitched_simulation_count):
            current_stitch_seeds = []
            if self.first_seed_option == 'random':
                if self.remaining_seed_option == 'random':
                    current_stitch_seeds = self.seedControl.Generate_Random_Seeds(
                        self.segment_count)

                elif self.remaining_seed_option == 'ordered':
                    random_start_seed = self.seedControl.Generate_Random_Seed()
                    lower_bound = int(
                        self.seedControl.Convert(self.base, random_start_seed)
                    )  #convert lower bound to base 10
                    upper_bound = int(
                        self.seedControl.Convert(self.base, random_start_seed)
                    ) + self.segment_count  #convert seed range upper bound to base 10
                    current_stitch_seeds = self.seedControl.Generate_Seed_Range(
                        lower_bound, upper_bound, self.seedControl.seed_length)
                else:
                    print('Seed options incorrect')
                    quit()

            elif self.first_seed_option == 'loaded':
                if self.remaining_seed_option == 'random loaded':
                    if len(self.seed_group) == 0:
                        print('\nNo seeds loaded.')
                        quit()
                    current_stitch_seeds = [
                        self.seed_group[random.randint(
                            0,
                            len(self.seed_group) - 1)]
                        for x in range(self.segment_count)
                    ]

                elif self.remaining_seed_option == 'cyclic loaded':  #always starts with first seed
                    current_stitch_seeds = [
                        self.seed_group[x % len(self.seed_group)]
                        for x in range(self.segment_count)
                    ]

                elif self.remaining_seed_option == 'random':  #remaining seeds are completely random, not in loaded group
                    loaded_start_seeds = self.seed_group[S % len(
                        self.seed_group
                    )]  # for x in range(len(self.seed_group))]
                    random_remaining_group = self.seedControl.Generate_Random_Seeds(
                        self.segment_count - len(loaded_start_seeds))
                    current_stitch_seeds = loaded_start_seeds + random_remaining_group

                elif self.remaining_seed_option == 'ordered':
                    print(
                        'ordered remaining seeds is not implemented at this time'
                    )
                    quit()
                else:
                    print('Seed options incorrect')
                    quit()
            else:
                print('Seed options incorrect')
                quit()

            self.all_seeds.append(current_stitch_seeds)
Exemplo n.º 10
0
class Stitched_Driver():

    # loads fundemental information for group of stitched simulations
    def __init__(self, simulation_count, segment_count, width,
                 segment_heights):
        self.stitched_simulation_count = simulation_count
        self.segment_count = segment_count
        self.width = width
        self.loaded_segment_heights = segment_heights
        self.segment_steps = segment_heights

    # loads information regarding seeds for each simulation
    # handles all types of options for first seed and remaining seeds of each image
    def Setup_Seeds(self, seed_group, rule_type, base, kernel, first_seeds,
                    remaining_seeds):
        self.seed_group = seed_group
        self.rule_type = rule_type
        self.base = base
        self.kernel = Kernel(kernel)
        self.first_seed_option = first_seeds
        self.remaining_seed_option = remaining_seeds
        self.all_seeds = []
        self.seedControl = Seeds()
        self.seedControl.Config(self.rule_type, self.base,
                                self.kernel.linear_kernel_length)
        for S in range(self.stitched_simulation_count):
            current_stitch_seeds = []
            if self.first_seed_option == 'random':
                if self.remaining_seed_option == 'random':
                    current_stitch_seeds = self.seedControl.Generate_Random_Seeds(
                        self.segment_count)

                elif self.remaining_seed_option == 'ordered':
                    random_start_seed = self.seedControl.Generate_Random_Seed()
                    lower_bound = int(
                        self.seedControl.Convert(self.base, random_start_seed)
                    )  #convert lower bound to base 10
                    upper_bound = int(
                        self.seedControl.Convert(self.base, random_start_seed)
                    ) + self.segment_count  #convert seed range upper bound to base 10
                    current_stitch_seeds = self.seedControl.Generate_Seed_Range(
                        lower_bound, upper_bound, self.seedControl.seed_length)
                else:
                    print('Seed options incorrect')
                    quit()

            elif self.first_seed_option == 'loaded':
                if self.remaining_seed_option == 'random loaded':
                    if len(self.seed_group) == 0:
                        print('\nNo seeds loaded.')
                        quit()
                    current_stitch_seeds = [
                        self.seed_group[random.randint(
                            0,
                            len(self.seed_group) - 1)]
                        for x in range(self.segment_count)
                    ]

                elif self.remaining_seed_option == 'cyclic loaded':  #always starts with first seed
                    current_stitch_seeds = [
                        self.seed_group[x % len(self.seed_group)]
                        for x in range(self.segment_count)
                    ]

                elif self.remaining_seed_option == 'random':  #remaining seeds are completely random, not in loaded group
                    loaded_start_seeds = self.seed_group[S % len(
                        self.seed_group
                    )]  # for x in range(len(self.seed_group))]
                    random_remaining_group = self.seedControl.Generate_Random_Seeds(
                        self.segment_count - len(loaded_start_seeds))
                    current_stitch_seeds = loaded_start_seeds + random_remaining_group

                elif self.remaining_seed_option == 'ordered':
                    print(
                        'ordered remaining seeds is not implemented at this time'
                    )
                    quit()
                else:
                    print('Seed options incorrect')
                    quit()
            else:
                print('Seed options incorrect')
                quit()

            self.all_seeds.append(current_stitch_seeds)

    # sets up folder structure for image groups
    # creates seed log
    def Setup_Saving(self):
        #create simulation data container
        self.Setup_Data_Container()
        #create group folder
        desktop = os.path.join(os.path.join(os.environ['USERPROFILE']),
                               'Desktop')
        desktop += '\\WolframSimulations'
        if not os.path.exists(desktop):
            os.makedirs(desktop)
        self.default_save_path = desktop
        self.relative_save_path = self.default_save_path
        print('Image Save Path:')
        if self.stitched_simulation_count > 1:
            self.latest_dir_index = 0
            self.relative_save_path += '\\Groups'
            dirs = []
            for root, d, files in os.walk(self.relative_save_path,
                                          topdown=False):
                dirs = d
            if len(dirs) > 0:
                self.latest_dir_index = max([
                    int(dirs[x][:(dirs[x].find('_'))])
                    for x in range(len(dirs))
                ])
            folder = (str(self.latest_dir_index + 1) + '_SC' +
                      self.display_info[0] + '-SG_' + self.display_info[1] +
                      '-RT_' + self.display_info[2] + '-B_' +
                      self.display_info[3])
            self.relative_save_path += '\\' + folder

            if not os.path.exists(self.relative_save_path):
                os.makedirs(self.relative_save_path)
            #save seed log to folder
            self.seed_log_file = open(
                self.relative_save_path + '\\seed_maps.txt', 'a')
            for i in range(self.stitched_simulation_count):

                self.seed_log_file.write('Stitch: ' + str(i + 1) + '\n')
                for j in range(self.segment_count):
                    self.seed_log_file.write(
                        ''.join(map(str, self.all_seeds[i][j])) + '\n')
            self.seed_log_file.close()

            print(self.relative_save_path)

        else:
            self.latest_file_index = 0
            self.relative_save_path += '\\Singles'
            print(self.relative_save_path)
            if not os.path.exists(self.relative_save_path):
                os.makedirs(self.relative_save_path)
            file_names = []
            for root, d, files in os.walk(self.relative_save_path,
                                          topdown=False):
                file_names = files
            if len(file_names) > 0:
                self.latest_file_index = max([
                    int(file_names[x][:(file_names[x].find('_'))])
                    for x in range(len(file_names))
                ])
            self.latest_file_index += 1

    # sets up all initial rows for each simulation chain in each image
    def Setup_Initial_Conditions(self, start_type, start_sub_type, start_gap,
                                 start_groups, heights_setting):
        self.start_type = start_type
        self.start_sub_type = start_sub_type
        self.start_gap = start_gap
        self.start_groups = start_groups
        self.all_heights = []
        self.heights_setting = heights_setting
        if heights_setting == 'cyclic':
            constant_height_order = [
                self.loaded_segment_heights[x %
                                            len(self.loaded_segment_heights)]
                for x in range(self.segment_count)
            ]
            self.all_heights = [
                constant_height_order
                for x in range(self.stitched_simulation_count)
            ]
        elif heights_setting == 'random loaded':
            for i in range(self.stitched_simulation_count):
                random_loaded_height_order = [
                    self.loaded_segment_heights[random.randint(
                        0,
                        len(self.loaded_segment_heights) - 1)]
                    for x in range(self.segment_count)
                ]
                self.all_heights.append(random_loaded_height_order)
        elif heights_setting == 'random':
            #upper bounds are first and second heights entered
            try:
                for i in range(self.stitched_simulation_count):
                    random_height_order = [
                        random.randint(self.loaded_segment_heights[0],
                                       self.loaded_segment_heights[1])
                        for x in range(self.segment_count)
                    ]
                    self.all_heights.append(random_height_order)
            except:
                print('Please input upper and lower bounds for heights.')
                quit()

        else:
            print('Incorrect heights settings')
            quit()
        self.Print_Info()

    # runs simulations for each image, saves one at a time to prevent memory overload
    # runs each segment as individual simulation, calls 'Stitch_Simulations' to combine and save.
    def Run_Stitches(
        self,
        save_size,
        duplicate_saves,
    ):
        self.save_size = save_size
        for i in range(self.stitched_simulation_count):

            current_stitch_sims = []
            current_stitch_sims.append(
                Simulation(self.rule_type, self.base, self.all_heights[i][0],
                           self.width, self.all_heights[i][0], self.kernel))
            current_stitch_sims[0].Set_Seed(self.all_seeds[i][0])
            current_stitch_sims[0].Set_Initial_Condition(
                self.start_type, self.start_sub_type, self.start_gap,
                self.start_groups)
            for j in range(current_stitch_sims[0].height
                           ):  # - self.kernel.start_rows_needed):
                current_stitch_sims[0].Next_Step()

            #remaining simulations of stitch
            for j in range(1, self.segment_count):
                previous_end_rows = current_stitch_sims[j - 1].Get_Rows(
                    1, self.kernel.start_rows_needed)
                current_stitch_sims.append(
                    Simulation(self.rule_type, self.base,
                               self.all_heights[i][j], self.width,
                               self.all_heights[i][j], self.kernel))
                #should not need to send initial conditions
                current_stitch_sims[j].Set_Seed(self.all_seeds[i][j])
                current_stitch_sims[j].Insert_Rows_Top(previous_end_rows)
                for k in range(current_stitch_sims[j].height):
                    current_stitch_sims[j].Next_Step()

            self.Stitch_Simulations(
                i,
                current_stitch_sims,
                duplicate_saves,
            )
            print('Group Completion: ' +
                  str(float(i + 1) / self.stitched_simulation_count * 100) +
                  '%')

    # combines simulations into one large stitiched simulation
    def Stitch_Simulations(
        self,
        stitch_index,
        current_stitch,
        duplicate_saves,
    ):
        total_height = 0
        grids = []
        #getting all grids for single stitch
        for j in range(self.segment_count):
            total_height += current_stitch[j].height
            grids.append(current_stitch[j].grid.data)
        combined_rows = []
        for j in range(len(grids)):  #current simulation within one stitch
            for k in range(len(grids[j])):  #row within simulation
                combined_rows.append(grids[j][current_stitch[j].height - k])
        combined_simulation = Simulation(self.rule_type, self.base,
                                         total_height, self.width,
                                         total_height, self.kernel)
        combined_simulation.Set_Seed(self.all_seeds[0][0])  #dummy seed
        for i in range(total_height + 1):
            combined_simulation.Next_Step(combined_rows[i])
        combined_simulation.Set_Cmaps(self.cmaps)
        result_name = (str(stitch_index + 1) + '_K' + self.display_info[4] +
                       '_Dim' + str(self.width) + '_x_' + str(total_height))
        if self.stitched_simulation_count == 1:
            combined_simulation.Display_Current_Figure()
        combined_simulation.Save_Current_Figure(
            self.relative_save_path + '\\' + result_name, self.save_size,
            duplicate_saves)

    #private
    # creates container to hold relevant console print information
    # and information used when saving groups of images
    def Setup_Data_Container(self):
        self.display_info_labels = []
        self.display_info = []
        self.display_info_labels.append('Stitch Count:\t')
        self.display_info_labels.append('Segment Count:\t')
        self.display_info_labels.append('Rule Type:\t')
        self.display_info_labels.append('Number Base:\t')
        self.display_info_labels.append('Kernel:\t')
        self.display_info_labels.append('Width:\t')
        self.display_info_labels.append('Height:\t')
        self.display_info.append(str(self.stitched_simulation_count))
        self.display_info.append(str(self.segment_count))
        self.display_info.append(str(self.rule_type))
        self.display_info.append(str(self.base))
        self.display_info.append(''.join(map(str, self.kernel.linear_kernel)))

    #prints all group info at beginning of run
    def Print_Info(self):
        for i in range(len(self.display_info)):
            print(self.display_info_labels[i] + str(self.display_info[i]))
        for i in range(len(self.all_seeds)):
            print('\nStitch ' + str(i + 1) + ':')

            print('\tDimensions:\t' + str(self.width) + ' x ' +
                  str(self.all_heights[i]))
            for j in range(len(self.all_seeds[i])):
                print('\tSeed ' + str(j + 1) + ':\t' +
                      str(''.join(map(str, self.all_seeds[i][j]))))

    #loads cmaps from Main
    def Load_Cmaps(self, cmaps):
        self.cmaps = cmaps