cases = list(set(map(lambda x: x[:3], case_raw.keys()))) numbers = list(set(map(lambda x: x[4:], case_raw.keys()))) else: cases = case_raw.keys() if vectors == 'Binary': # Take log base 2 to figure out how many bits we need for each human_size = int(ceil(log(len(human), 2))) # 2 dec_size = int(ceil(log(len(declensions), 2))) # 3 gen_size = int(ceil(log(len(genders), 2))) # 2 case_size = int(ceil(log(len(cases), 2))) # 3 if casenum_sep == True: num_size = int(ceil(log(len(numbers), 2))) # 1 # Now make two way dictionary with bit vectors human_dict = functions.binaryDict(human) dec_dict = functions.binaryDict(declensions) dec_dict.update(functions.invert(dec_dict)) gen_dict = functions.binaryDict(genders) gen_dict.update(functions.invert(gen_dict)) case_dict = functions.binaryDict(cases) case_dict.update(functions.invert(case_dict)) if casenum_sep == True: num_dict = functions.binaryDict(numbers) num_dict.update(functions.invert(num_dict)) # Identity vectors else: human_size = len(human) dec_size = len(declensions) gen_size = len(genders) case_size = len(cases)
return results ######## # MAIN # ######## # Read in corpus (corpus, suffixes) = objects.readCorpus(constants.corpus_file) # Determine corpus size from this corpus_size = len(corpus) # Create suffix dictionary if constants.vectors == 'binary': suffix_size = int(ceil(log(len(suffixes), 2))) # 6 suffix_dict = functions.binaryDict(suffixes) else: suffix_size = len(suffixes) suffix_dict = dict(zip(suffixes, map(tuple, identity(suffix_size)))) suf_to_tup = functions.invert(suffix_dict) suffix_dict.update(functions.invert(suffix_dict)) ########## # OUTPUT # ########## # Output layer will be list of potential suffixes, gathered from corpus output_nodes = suffix_size # Print information