print 'cz = ', cluster_sz
						# calculate language vectors
						lang_vectors = random_idx.generate_RI_lang(N, RI_letters, cluster_sz, ordered, languages=languages)
						total_vec.append(lang_vectors)
						# print cosine angles 
						if ordered == 0:
								ord_str = 'unordered!'
						else:
								ord_str = 'ordered!'

				# calculate total vector
				final_lang = sum(total_vec)

				# calculate variance of cos angle distribution
				cosangles = utils.cosangles(final_lang, languages)
				vary = utils.var_measure(cosangles)
				V[i,j] = vary
				print 'N = ' + str(N) + '; k = ' + str(k) + '; letters clusters are ' + str(cluster_sizes) + ', ' + ord_str + '\n'
				print "variance of cosine values: " + str(vary)
				print '=========='

np.savez('./vars/vars_dump.npz',V=V, Ns=Ns, sparsities=sparsities,ks=ks)

# plot results
#CS = plt.contourf(sparsities,Ns,V, alpha=0.7, cmap=plt.cm.jet)
CS = plt.contourf(np.log10(ks),Ns,V, alpha=0.7, cmap=plt.cm.jet)
CB = plt.colorbar(CS, shrink=0.8, extend='both')
plt.xlabel('log(k)')
plt.ylabel('N')
plt.title('Variance of Cosine Angles Between Vectors')
plt.savefig('./plots/Nk_contours-ridiculous.png',bbox='tight')
					total_vectors.append(lang_vectors)

					# calculate unknown vector
					unknown_vector = random_idx.generate_RI_text(N, RI_letters, cluster_sz, ordered,unknown_txt)
					unknown_tots.append(unknown_vector)

					# print cosine angles 
					print '=========='
					if ordered == 0:
							ord_str = 'unordered!'
					else:
							ord_str = 'ordered!'

					print 'N = ' + str(N) + '; k = ' + str(k) + '; letters clusters are ' + str(cluster_sz) + ', ' + ord_str + '\n'
					cosangles = utils.cosangles(lang_vectors, languages)
					variance = utils.var_measure(cosangles)
					varys.append(variance)
					print "variance of language values: " + str(utils.var_measure(cosangles))

########################
'''
# history vectors
lang_vectors = random_idx.generate_RI_lang_history(N, RI_letters, languages=languages)
total_vectors.append(lang_vectors)
unknown_vector = random_idx.generate_RI_text_history(N, RI_letters, unknown_txt)
unknown_tots.append(unknown_vector)

print "~~~~~~~~~~"
print "history vector information"
cosangles = utils.cosangles(lang_vectors, languages)
variance = utils.var_measure(cosangles)
示例#3
0
				for cluster_sz in cluster_sizes:
							
							##print "~~~~~~~~~~"
							total_vectors = []
							# calculate language vectors
							lang_vectors = random_idx.generate_RI_lang(N, RI_letters, cluster_sz, ordered, languages=languages)
							#lang_vectors = random_idx.generate_RI_lang_words(N, RI_letters, languages=languages)
							total_vectors.append(lang_vectors)

							# print cosine angles 
							#print '=========='
							

						#	print 'N = ' + str(N) + '; k = ' + str(k) + '; letters clusters are ' + str(cluster_sz) + ', ' + ord_str + '\n'
							cosangles = utils.cosangles(lang_vectors, languages)
							variance = utils.var_measure(cosangles)
							#print "variance of language values: " + str(utils.var_measure(cosangles))
							final_lang = sum(total_vectors)

							###############################
							# iterate through test files and calculate correctness
							test_fn = glob.glob(main_base + test_dir + '/*txt')
							total = len(test_fn)
							correct = 0

							for i in trange(total):
									testf = test_fn[i]
									actual_lang = re.findall('(\w+)_\d+_p.txt$', testf)[0]
									unknown_tots = []
									#print len(testf),testf[91:93]
								#if testf == main_base + test_dir + '/da_432_p.txt':
				for cluster_sz in cluster_sizes:
							
							##print "~~~~~~~~~~"
							total_vectors = []
							# calculate language vectors
							lang_vectors = random_idx.generate_RI_lang(N, RI_letters, cluster_sz, ordered, languages=languages)
							#lang_vectors = random_idx.generate_RI_lang_words(N, RI_letters, languages=languages)
							total_vectors.append(lang_vectors)

							# print cosine angles 
							#print '=========='
							

						#	print 'N = ' + str(N) + '; k = ' + str(k) + '; letters clusters are ' + str(cluster_sz) + ', ' + ord_str + '\n'
							cosangles = utils.cosangles(lang_vectors, languages)
							variance = utils.var_measure(cosangles)
							#print "variance of language values: " + str(utils.var_measure(cosangles))
							final_lang = sum(total_vectors)

							###############################
							# iterate through test files and calculate correctness
							print "Now waiting for input"
							while True:
								sentence = raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True))
								unknown_tots = []
								sentence = cleaner(sentence)
								unknown_vector = random_idx.generate_RI_sentence(N, RI_letters, cluster_sz, ordered,sentence)
								unknown_tots.append(unknown_vector)
								final_unknown = sum(unknown_tots)
								likely_lang = utils.find_language(sentence, final_unknown, final_lang, languages,display=0)
								print "likely language:", likely_lang
示例#5
0
                total_vectors = []
                # calculate language vectors
                lang_vectors = random_idx.generate_RI_lang(N,
                                                           RI_letters,
                                                           cluster_sz,
                                                           ordered,
                                                           languages=languages)
                #lang_vectors = random_idx.generate_RI_lang_words(N, RI_letters, languages=languages)
                total_vectors.append(lang_vectors)

                # print cosine angles
                #print '=========='

                #	print 'N = ' + str(N) + '; k = ' + str(k) + '; letters clusters are ' + str(cluster_sz) + ', ' + ord_str + '\n'
                cosangles = utils.cosangles(lang_vectors, languages)
                variance = utils.var_measure(cosangles)
                #print "variance of language values: " + str(utils.var_measure(cosangles))
                final_lang = sum(total_vectors)

                ###############################
                # iterate through test files and calculate correctness
                print "Now waiting for input"
                while True:
                    sentence = raw_input().decode(
                        sys.stdin.encoding
                        or locale.getpreferredencoding(True))
                    unknown_tots = []
                    sentence = cleaner(sentence)
                    unknown_vector = random_idx.generate_RI_sentence(
                        N, RI_letters, cluster_sz, ordered, sentence)
                    unknown_tots.append(unknown_vector)