def gbootstrap(ginput, goutput, fnames): print "Bootstrapping", ginput.getName(), "with", if fnames == []: print "basic elements" else: for fname in fnames: print "\""+fname+"\"", print "" # fixed parameters spectral_radius = 0.9 # Sequences seqs = [] for fname in fnames: x = aux.load_words_from_text(fname, ginput) seqs.extend(x) if fnames == []: seqs = list(goutput.alphabet) # to avoid using a reference! seqs.remove(goutput.stop_symbol) #print seqs #print "words #", len(words) train_set = seqs #test_set = ??? to_train = [] #to_test = [] for seq in train_set: if "SeqWord" in ginput.getName(): syls = ginput.getSyllables(seq) for j in range(len(syls)-1): to_train.append(list("".join(syls[0:j]))) to_train.extend(list(syls)) to_train.append(list(seq) + [ginput.stop_symbol]) else: to_train.append([seq , ginput.stop_symbol]) wvectorizer = WVectorizer.WVectorizer(ginput.getSize(), ginput.getInternalSize(), spectral_radius) #to_train = list(set(to_train)) #print to_train if "" in to_train: to_train.remove("") if [] in to_train: to_train.remove([]) #print to_train to_train = map(lambda x: (x, wvectorizer.wvectorize(ginput.input(x))), to_train) #to_test = map(lambda x: (x, wvectorizer.wvectorize(winput.input(x))), to_test) #print to_train wunvectorizer = WunVectorizer.WunVectorizer(spectral_radius, goutput) wunvectorizer.train(to_train, [], True) #print "" return wvectorizer, wunvectorizer
def gbootstrap(ginput, goutput, fnames): print "Bootstrapping", ginput.getName(), "with", if fnames == []: print "basic elements" else: for fname in fnames: print "\"" + fname + "\"", print "" # fixed parameters spectral_radius = 0.9 # Sequences seqs = [] for fname in fnames: x = aux.load_words_from_text(fname, ginput) seqs.extend(x) if fnames == []: seqs = list(goutput.alphabet) # to avoid using a reference! seqs.remove(goutput.stop_symbol) #print seqs #print "words #", len(words) train_set = seqs #test_set = ??? to_train = [] #to_test = [] for seq in train_set: if "SeqWord" in ginput.getName(): syls = ginput.getSyllables(seq) for j in range(len(syls) - 1): to_train.append(list("".join(syls[0:j]))) to_train.extend(list(syls)) to_train.append(list(seq) + [ginput.stop_symbol]) else: to_train.append([seq, ginput.stop_symbol]) wvectorizer = WVectorizer.WVectorizer(ginput.getSize(), ginput.getInternalSize(), spectral_radius) #to_train = list(set(to_train)) #print to_train if "" in to_train: to_train.remove("") if [] in to_train: to_train.remove([]) #print to_train to_train = map(lambda x: (x, wvectorizer.wvectorize(ginput.input(x))), to_train) #to_test = map(lambda x: (x, wvectorizer.wvectorize(winput.input(x))), to_test) #print to_train wunvectorizer = WunVectorizer.WunVectorizer(spectral_radius, goutput) wunvectorizer.train(to_train, [], True) #print "" return wvectorizer, wunvectorizer
Copyright 2010, 2011, 2012 by neuromancer """ import src.io.text.English as English import src.core.Mind as Mind import src.Stats as Stats from src.aux import load_words_from_text # mind initilization winput = English.InputWords() woutput = English.OutputWords() mind = Mind.Mind([winput], [woutput], ["data/pos/categories.txt"]) cats_files = load_words_from_text("data/pos/categories.longer.txt", winput) cats = load_words_from_text("data/pos/categories.txt", winput) stats = Stats.Stats(mind) # mind training for (cat, cat_file) in zip(cats, cats_files): print "Assimilating", cat_file + "s.." words = load_words_from_text("data/pos/" + cat_file + "s_train.txt", winput) cat_inputs = dict(SeqWordsEn=list(cat)) for word in words:
Copyright 2010, 2011, 2012 by neuromancer """ import src.io.text.English as English import src.core.Mind as Mind import src.Stats as Stats from src.aux import load_words_from_text # mind initilization winput = English.InputWords() woutput = English.OutputWords() mind = Mind.Mind([winput], [woutput], ["data/pos/categories.txt"]) cats_files = load_words_from_text("data/pos/categories.longer.txt", winput) cats = load_words_from_text("data/pos/categories.txt", winput) stats = Stats.Stats(mind) # mind training for (cat,cat_file) in zip(cats,cats_files): print "Assimilating", cat_file+"s.." words = load_words_from_text("data/pos/"+cat_file+"s_train.txt", winput) cat_inputs = dict( SeqWordsEn = list(cat)) for word in words: word_inputs = dict( SeqWordsEn = list(word))