def collect_statistics_into_model(text_iter, lang_model): for line in util.counter(text_iter): toks = tokenize_and_clean(line, alignments=False) lang_model.info['big_n'] += len(toks) for unigram in filtered_unigrams(toks): lang_model.add(unigram) for bigram in filtered_bigrams(toks): lang_model.add(bigram) for trigram in filtered_trigrams(toks): lang_model.add(trigram)
def collect_statistics_into_model(text_iter, lang_model): for line in util.counter( text_iter ): toks = tokenize_and_clean(line, alignments=False) lang_model.info['big_n'] += len(toks) for unigram in filtered_unigrams(toks): lang_model.add(unigram) for bigram in filtered_bigrams(toks): lang_model.add(bigram) for trigram in filtered_trigrams(toks): lang_model.add(trigram)
def findHighWeightFeatures(self,label): featuresWeights=[]; c = util.counter(); for i in range(28): for j in range(28): c[(i,j)]=self.weights[label][(i,j)]; for k in range(100): x=c.argMax(); featuresWeights.append(x); c[x]=-(self.max_iterations+1); return featuresWeights
def build_hist(self, features, cluster=False): m = condense(features) if cluster: print("Making vocab...") print('Clustering...') preds = self.kMeans.fit_predict(m) else: preds = self.kMeans.predict(m) length = len(features) hist = np.zeros([length, VOCAB_SIZE]) c = counter() print('Making histogram...') for i in range(length): for _ in range(len(features[i])): word = preds[next(c)] hist[i][word] += 1 if cluster: print('Vocab complete') return hist
def pack2( (W,H), partialpack, packed, active, on_update ): if not partialpack: on_update(active, packed, None, None, {}, "Nothing left to pack. Success.") return True, packed if not active: #print "No active points left" return False, [] lmao = active.pop() activerects = [ item for item in partialpack if item[1] == lmao.x ] inactiverects = [ item for item in partialpack if item[1] != lmao.x ] c = util.counter() for item,x in activerects: if item != None: c = c.incr(item) if not activerects or W == lmao.x or H == lmao.y or any( r.covers(lmao.pos()) for r in packed ): return pack2((W,H), partialpack, packed, active, on_update) for i, (rectangle, _) in enumerate(activerects): if rectangle != None: rect = util.Rectangle(rectangle, lmao.pos()) else: rect = util.Rectangle((1,1), lmao.pos(), is_real=False) on_update(active, packed, lmao, rect, c, "Trying rectangle " + str(rect) + " at position " + str(lmao))
def prebaked_iter(filename): for line in util.counter(open(filename)): yield simplejson.loads(line)
d_b_var = [var for var in t_var if 'b_discriminator' in var.name] g_var = [ var for var in t_var if 'a2b_generator' in var.name or 'b2a_generator' in var.name ] d_a_train = tf.train.AdamOptimizer(lr, beta1=0.5).minimize(d_loss_a, var_list=d_a_var) d_b_train = tf.train.AdamOptimizer(lr, beta1=0.5).minimize(d_loss_b, var_list=d_b_var) g_train = tf.train.AdamOptimizer(lr, beta1=0.5).minimize(g_loss, var_list=g_var) ''' Train ''' sess = tf.Session() cnt, update_cnt = util.counter() trainA_path = glob('./datasets/' + dataset + '/trainA/*.jpg') trainB_path = glob('./datasets/' + dataset + '/trainB/*.jpg') trainA_pool = data.ImageData(sess, trainA_path, batch_size, load_size=load_size, crop_size=crop_size) trainB_pool = data.ImageData(sess, trainB_path, batch_size, load_size=load_size, crop_size=crop_size) testA_path = glob('./datasets/' + dataset + '/testA/*.jpg')
def fromfile(filename): counter = util.counter() with open(filename) as f: for r in csv.reader(f): counter = counter.incr((r[0], r[1])) return counter
def quilt(i): if i == 14: return (i,i), util.counter({(6,6):1, (4,4):3, (3,3):4, (5,5):3}) if i == 15: return (i,i), util.counter({(8,8):1, (7,7):1, (5,5):1, (4,4):3, (3,3):3}) if i == 16: return (i,i), util.counter({(7,7):1, (6,6):2, (5,5):3, (4,4):2, (3,3):3}) if i == 17: return (i,i), util.counter({(9,9):1, (8,8):2, (5,5):1, (4,4):2, (3,3):1, (2,2):3}) if i == 18: return (i,i), util.counter({(7,7):3, (6,6):1, (5,5):3, (4,4):4}) if i == 19: return (i,i), util.counter({(7,7):3, (6,6):3, (5,5):3, (3,3):3}) if i == 20: return (i,i), util.counter({(8,8):1, (7,7):3, (6,6):3, (5,5):2, (3,3):3}) if i == 21: return (i,i), util.counter({(9,9):1, (8,8):2, (7,7):1, (6,6):3, (5,5):1, (4,4):3}) if i == 22: return (i,i), util.counter({(12,12):1, (10,10):2, (7,7):1, (5,5):2, (4,4):1, (3,3):2, (2,2):1}) if i == 23: return (i,i), util.counter({(12,12):1, (11,11):2, (7,7):1, (5,5):2, (4,4):1, (3,3):2, (2,2):2}) if i == 24: return (i,i), util.counter({(10,10):2, (9,9):1, (7,7):3, (5,5):3, (4,4):4}) if i == 25: return (i,i), util.counter({(9,9):3, (8,8):3, (7,7):2, (5,5):3, (3,3):1}) if i == 26: return (i,i), util.counter({(10,10):1, (9,9):3, (8,8):3, (7,7):1, (5,5):3, (3,3):1}) if i == 27: return (i,i), util.counter({(11,11):2, (9,9):1, (8,8):3, (7,7):3, (5,5):2, (3,3):1}) if i == 28: return (i,i), util.counter({(12,12):1, (11,11):1, (9,9):2, (8,8):3, (7,7):3, (5,5):2, (3,3):1}) if i == 29: return (i,i), util.counter({(15,15):1, (14,14):2, (8,8):1, (7,7):2, (5,5):1, (3,3):3}) if i == 30: return (i,i), util.counter({(11,11):3, (10,10):1, (9,9):3, (8,8):2, (4,4):4}) if i == 31: return (i,i), util.counter({(16,16):1, (12,12):1, (10,10):2, (9,9):2, (6,6):2, (5,5):4}) if i == 32: return (i,i), util.counter({(20,20):1, (12,12):3, (8,8):1, (7,7):1, (5,5):2, (3,3):2}) if i == 33: return (i,i), util.counter({(20,20):1, (13,13):2, (9,9):1, (7,7):2, (6,6):2, (5,5):2, (4,4):3}) if i == 34: return (i,i), util.counter({(17,17):3, (9,9):1, (8,8):2, (5,5):1, (4,4):2, (3,3):1, (2,2):3}) if i == 35: return (i,i), util.counter({(15,15):1, (13,13):1, (12,12):2, (10,10):3, (8,8):2, (7,7):1, (4,4):3}) if i == 36: return (i,i), util.counter({(13,13):2, (15,15):1, (8,8):4, (9,9):1, (11,11):2, (12,12):1}) raise Exception("I don't know how to build that quilt.")
return (i,i), util.counter({(15,15):1, (13,13):1, (12,12):2, (10,10):3, (8,8):2, (7,7):1, (4,4):3}) if i == 36: return (i,i), util.counter({(13,13):2, (15,15):1, (8,8):4, (9,9):1, (11,11):2, (12,12):1}) raise Exception("I don't know how to build that quilt.") def random(e,n, (W,H)=(20,20)): a = {} for i in range(int((1. - e)*W*H)): r = randint(0,n-1) if r in a: a[r] += 1 else: a[r] = 1 c = util.counter({}) for area in a.values(): r = [1,1] d = 2 while d*d <= area: while (area % d) == 0: area /= d r[randint(0,1)] *= d d+= 1 if area > 1: r[randint(0,1)] *= area c = c.incr((r[0],r[1])) return (W,H), c def random2(e,n, (W,H)=(20,20)):
def uniq_c(seq): ret = defaultdict(lambda:0) for x in seq: ret[x] += 1 return dict(ret) word_df = defaultdict(int) vocab = {} docids= {} files = {'pos':myopen("pos.num",'w'), 'neg':myopen("neg.num",'w')} for tag in ('pos','neg'): doc_ids = myopen("%s_doc_ids" % tag).read().split() #[:100] for d in util.counter(doc_ids): text = myopen(glob.glob("../txt_sentoken/%s/*_%s.txt" % (tag,d))[0]).read() text = re.sub(r'\s+',' ',text.strip()) words = text.encode('unicode_escape','replace').replace(":","_COLON_").split() if d not in docids: docids[d] = len(docids)+1 for w,c in uniq_c(words).items(): word_df[w] += 1 if w not in vocab: vocab[w] = len(vocab)+1 print>>files[tag], docids[d], vocab[w], c with myopen("vocab.txt",'w') as f: for w in sorted(vocab, key=lambda w: vocab[w]): print>>f, w with myopen("word_stats.txt",'w') as f: