def reposition_wdsneurep(infile, outfile, refpos=PATS_FILE): d = nw.filereader_factory('neurep', infile) refp = nw.filereader_factory('readl', refpos) rpos_l = [] for w in refp: rpos_l.append((w, d[w])) nw.filewriter_factory('neurep', outfile, rpos_l)
def filize_lxcfcts(): lxcfcts_reps = nw.filereader_factory( 'neurep', pu.get_fn_neurep('n2v2afp', 'nhf20', SYNPATH, 'syneurep')['fct_neurep']) lxcfcts = nw.filereader_factory( 'readl', F_SYNFCT ) #['lxc/n', 'lxc/v', 'lxc/aj', 'lxc/conj', 'lxc/prep', 'lxc/pron', 'lxc/adv'] for i in range(len(lxcfcts)): lxcf = re.sub("/", "", lxcfcts[i]) outfile = SYNPATH + lxcf + '.csv' d = {lxcfcts[i]: lxcfcts_reps[lxcfcts[i]]} nw.filewriter_factory('neurep', outfile, d)
def extract_neurep_wordlist(fn_neurep, wordlist, fout): # pdb.set_trace() allwd_reps = nw.filereader_factory('neurep', fn_neurep) d = {} for w in wordlist: d[w] = allwd_reps[w] nw.filewriter_factory('neurep', fout, d)
def get_entropy_units(filename, entunit_file, actunt=False, inactst=7): ''' >>> ent_u_l = get_entropy_units('pattern.dat') ''' # pdb.set_trace() data = nw.filereader_factory('nowlab', filename) ent_u_l = [] no_units = np.shape(data)[1] #SAHAR: f_test = open(entunit_file, 'w') entunitd_file = entunit_file + '_d' f_test2 = open(entunitd_file, 'w') u_allz_no = 0 u_allz_l = [] for n_u in range(no_units): st_u = list(data[:, n_u]) f_test.write(str(st_u) + '\n') hist_st_d = dict([(x, st_u.count(x)) for x in set(st_u)]) f_test2.write(str(hist_st_d) + '\n') if actunt: if inactst in hist_st_d: del [hist_st_d[inactst]] # else: # print 'No ', inactst, 'for unit ', n_u, ' in ', hist_st_d if not hist_st_d: u_allz_no += 1 u_allz_l.append(n_u) ent_u = KLdiv.shannon_entropy(hist_st_d) ent_u_l.append(ent_u) print 'no of units whose states are all inactive: ', u_allz_no # print 'units no: ', u_allz_l f_test.close() return ent_u_l
def convt_base2sform(infile='fncn_normalized_sbjwd_5Nov_10m.txt', outfile='fncn_sform_normalized_sbjwd_5Nov_10m.txt', type_='jntpb', *args, **kwds): ''' >>> nws.convt_base2sform(infile='fncn_normalized_sbjwd_5Nov_10m.txt', outfile='fncn_sform_normalized_sbjwd_5Nov_10m.txt', feats_place=[0,1], cor_place=2) >>> nws.convt_base2sform(infile='fncvb_normalized_sbjwd_5Nov_10m.txt', outfile='fncvb_sform_normalized_sbjwd_5Nov_10m.txt', feats_place=[0,1], cor_place=2) ''' root_d = blissplot.get_wbaseform_d() sform = root_d.keys() bform = root_d.values() basefile_d = nw.filereader_factory(type_, infile, *args, **kwds) outfile = open(outfile, 'w') for k, v in basefile_d.iteritems(): l = '' ws = k.split() for w in ws: if w in bform: w = sform[bform.index(w)] l = l + w + '\t' if type(v) is list: v = [str(i) for i in v] stv = ' '.join(v) else: stv = str(v) outfile.write(l + stv + '\n')
def replc_w_n_jntfile(type_='jntpb_fq0', fn='jntf', fout='out', pat_file='pats_149.txt'): ''' >>> nws.replc_n1_n2_neurepfile() ''' # pdb.set_trace() d1 = nw.filereader_factory(type_, fn) pats = nw.filereader_factory('readl', pat_file) d = {} for k, v in d1.iteritems(): w1, w2 = k.split() npr = str(pats.index(w1)) + ' ' + str(pats.index(w2)) d[npr] = v bu.UtilDict().writetofile(d, fout)
def assign_colorcodes(filename='BLISS_fncwds+factorsyn.txt', cmap='jet'): plt.figure() fncwds = nw.filereader_factory('readl', filename) num = len(fncwds) plt.scatter(range(num), [0] * num, c=range(num), s=30, cmap=cmap, linewidth=.5) plt.xticks(np.arange(num), fncwds, rotation=45)
def get_corr_wdl(corfile, wdl): cor_na_d = nw.filereader_factory('cor', corfile, corr_id_pirmorad=0) cor_nas_d = nw.filereader_factory('cor', corfile, corr_id_pirmorad=1) f = open(WDL_FILE, 'a') f.write(corfile + '\n') f.write('word1 word2 Na Nas\n') for w in nw.combinations(wdl, 2): pair = ' '.join([w[0], w[1]]) revpair = ' '.join([w[1], w[0]]) if pair in cor_na_d: na = cor_na_d[pair] nas = cor_nas_d[pair] else: na = cor_na_d[revpair] nas = cor_nas_d[revpair] f.write(pair + ' ' + str(na) + ' ' + str(nas) + '\n') f.write('***********\n') f.close()
def create_jntpbfile(fn_pb): f1 = 'BLISS_adjs.txt' f2 = 'BLISS_nouns_sg.txt' f3 = 'BLISS_verbs_sg.txt' z = [(f1, f2), (f2, f3), (f3, f1), (f3, f2)] fpb = open(fn_pb, 'w') for files in z: file1 = nw.filereader_factory('readl', files[0]) file2 = nw.filereader_factory('readl', files[1]) pb = 1. / (len(file2)) if (file1 == 'BLISS_verbs_sg.txt'): pb = 1. / 36 for wd1 in file1: for wd2 in file2: fpb.write(wd1 + '\t' + wd2 + '\t' + str(pb) + '\n') fpb.close() fout = fn_pb + '_adp2pats' replc_w_n_jntfile(fn=fn_pb, type_='jntpb', fout=fout)
def modify_sparsity_fwds(fn, fn_out): fwds = nw.filereader_factory('readl', 'BLISS_fncwds.txt') pats = nw.filereader_factory('readl', 'pats_149.txt') data = nw.filereader_factory('nowlab', fn) fout = open(fn_out, 'w') no_flips = 90 # 135 - 45 idx_l = [] for idx, item in enumerate(pats): if item in fwds: idx_l.append(idx) for p_no in range(len(pats)): p = list(data[p_no, :]) if p_no in idx_l: # pdb.set_trace() idx_actv_units = [idx for idx, item in enumerate(p) if item != 7] idx_chosenunts = random.sample(idx_actv_units, no_flips) for i in idx_chosenunts: p[i] = 7 p_st = [str(int(i)) for i in p] p_st = ' '.join(p_st) fout.write(p_st + '\n') fout.close()
def plot_Nas_lxcatswithlxcfcts(sfx='nhf20', path='result/synrep-fsynmlt.1-27Jan12/', neu='syneurep'): ''' nws.plot_Nas_lxcatswithlxcfcts(sfx='nhf20', path='result/synrep-fsynmlt.1-27Jan12/') ''' lxcats = WD_CATS lxcats_nm = lxcats lxcats_nm_captlz = [i.title() for i in lxcats_nm] fn_lxcats = get_fn_lxcats(path, sfx, neu) lxcfct1 = nw.filereader_factory( 'readl', F_SYNFCT ) #['lxc/n', 'lxc/v', 'lxc/aj', 'lxc/conj', 'lxc/prep', 'lxc/pron', 'lxc/adv'] lxcfct = [re.sub("/", "", f) for f in lxcfct1] # pdb.set_trace() h = open('Nas', 'a') h.write('\n**** ' + sfx + ' ' + path + '\n') for j in [6]: #range(len(lxcfct)): fn2 = path + lxcfct[j] + '.csv' plt.figure() avgnas = [] # colornames = ['r', 'r', 'b', 'b', 'b', 'r', 'r', 'b'] # colornames = ['b', 'b', 'r', 'r', 'b', 'b', 'b', [.1,.5,1]] # colornames = ['b', 'b', 'b', 'b', 'r', 'b', 'b', 'c'] # colornames = ['b', 'b', 'b', 'b', 'b', 'b', 'b', [0.4,.6,.9]] # colornames = ['b', 'b', 'b', 'b', 'b', 'b', 'b', 'c'] # colornames = ['b', 'b', 'b', 'b', 'b', 'b', 'b', [.1,.5,1]] colornames = [ 'b', 'b', 'b', 'b', [.5, .8, .9], 'b', 'b', [0.4, .6, .9] ] for i in range(len(lxcats)): fn1 = fn_lxcats[ i] #pu.get_fn_neurep(lxcats[i],sfx,path,neu=neu)['fn'] print lxcfct[j] + ", " + lxcats[i] fncor = path + lxcats[i] + '_' + lxcfct[j] + sfx + '_corr.csv' #pdb.set_trace() nw.get_corr_pirmoradfeats(fn1, True, fn2, fncor) avgnas.append(get_avgNas(fncor, 1)) avgnas_s = [str(int(round(av))) for av in avgnas] # pdb.set_trace() h.write(lxcats[i] + ': ' + ' '.join(avgnas_s) + '\n') plt.bar(range(len(lxcats)), avgnas, color=colornames) plt.xticks(np.arange(len(lxcats)) + .4, lxcats_nm_captlz, fontsize=26) #plt.title('avg Nas between ' + lxcfct1[j] + ' and lxcats (' + sfx + ')') plt.yticks(range(0, 91, 10), fontsize=20) #plt.ylim([0,90]) plt.ylabel('<Nas>', fontsize=30) plt.savefig(path + 'avgNas_' + lxcfct[j] + '_' + sfx + '.png') h.close()
def avgsparsity(filename): d = nw.filereader_factory('neurep', filename) sparsity = [] no_wds = len(d) for w, neu in d.iteritems(): no_units = float(len(neu)) neu_arr = np.array(neu) no_active_units = len(neu_arr[neu_arr > 0]) sp = no_active_units / no_units # print w, ':', sp sparsity.append(sp) # print sparsity # print sort(sparsity) return np.mean(sparsity)
def get_avgNas_elefiles(f='pattern.dat', corr_id_pirmorad=1, path='result/randcor-16:00-28May12/', form='scatter', save_fig=1, new_fig=1): fn = path + f # pdb.set_trace() data = nw.filereader_factory('nowlab', fn) Nas_l = [] Na_l = [] Nad_l = [] no_pats = np.shape(data)[0] no_units = np.shape(data)[1] for n_p1 in range(no_pats): p1 = list(data[n_p1, :]) for n_p2 in range(n_p1 + 1, no_pats): p2 = list(data[n_p2, :]) nas = 0 na = 0 for n_u in range(no_units): if ((p1[n_u] != 7) and (p2[n_u] != 7)): na = na + 1 if (p1[n_u] == p2[n_u]): nas = nas + 1 Nas_l.append(nas) Nad_l.append(na - nas) Na_l.append(na) print 'avg(Nas):' + str(np.mean(Nas_l)) print 'std(Nas):' + str(np.std(Nas_l)) print 'avg(Nad):' + str(np.mean(Nad_l)) print 'std(Nad):' + str(np.std(Nad_l)) print 'avg(Na):' + str(np.mean(Na_l)) print 'std(Na):' + str(np.std(Na_l)) sfx = '' dist_l = [] if corr_id_pirmorad == 0: dist_l = Na_l sfx = 'Na' elif corr_id_pirmorad == 1: dist_l = Nas_l sfx = 'Nas' elif corr_id_pirmorad == 2: dist_l = Nad_l sfx = 'Nad' mean, std = plot_freq_dist(dist_l, form=form, xlab=sfx, save_fig=save_fig, new_fig=new_fig) # plt.savefig(path + 'Nas_' + f +'.png') # fit an exponential function # logNas_fq_srt = [np.log(nas) for nas in Nas_fq_srt] # z= polyfit(Nas_srt, logNas_fq_srt, 1) # print 'exp values:', z # z2=[(np.exp(z[1])*np.exp(z[0]*i)) for i in Nas_srt] # plt.figure();plt.plot(Nas_srt, z2) # mean, std = plot_freq_dist(Na_l, title='Na',save_fig=save_fig,new_fig=new_fig) if save_fig: plt.savefig(path + 'Nas_' + f + '.png') return mean, std