def __init__(self, datapath): self.data = Reader(datapath, skip_header=False) self.get_data = lambda: self.data.sample(SAMPLES_NUMBER) self.b_u = {} self.b_i = {} self.q_i = {} self.p_u = {} self.avg_bu = 0 self.avg_bi = 0
def get_summary_of_dataset(filepath, skip_header=False, has_timestamp=True): reader = Reader(filepath, skip_header=skip_header) users = set() items = set() positive = 0 negative = 0 begin_time = time.time() end_time = 0 hours = {} for record in reader: users.add(record[0]) items.add(record[1]) result = record[2] if result > 0: positive += 1 elif result < 0: negative += 1 if not has_timestamp: continue timestamp = record[3] if begin_time > timestamp: begin_time = timestamp if end_time < timestamp: end_time = timestamp hour = timestamp2hour(timestamp) hours[hour] = hours.get(hour, 0) + 1 return locals()
def get_summary_of_user_profile(filepath, skip_header=False): def get_fields(line): record = line.split(",") try: age = 2012 - int(record[1]) except: age = 0 user = int(record[0]) gender = int(record[2]) num_of_tweet = int(record[3]) tags = map(int, record[4].strip().split(";")) return [user, age, gender, num_of_tweet, tags] reader = Reader(filepath, skip_header=skip_header, get_fields=get_fields) ages = {} genders = {} ntweet = {} ntags = {} for user, age, gender, num_of_tweet, tags in reader: num_of_tags = len(tags) ages[age] = ages.get(age, 0) + 1 genders[gender] = genders.get(gender, 0) + 1 ntweet[num_of_tweet] = ntweet.get(num_of_tweet, 0) + 1 ntags[num_of_tags] = ntags.get(num_of_tags, 0) + 1 return { 'age': ages, 'gender': genders, 'tweet': ntweet, 'tags': ntags, }
def __init__(self, settings=Settings(), reader=Reader()): self.settings = settings self.reader = reader self.train_data_loaded = False if self.settings.epe: self.best_epe = self.settings.epe else: self.best_epe = 100.0 print("Dataset Path:", self.settings.dataset)
def mean_average_precision(submission_path, solution_path): submission_data = Reader(submission_path, lambda line: line.strip().split(",")) solution_data = Reader(solution_path, lambda line: line.strip().split(",")) map3s = {} ap_sum = 0 user_cnt = 0 type = None try: while True: _, items1 = submission_data.next() _, items2, ptype = solution_data.next() if ptype != type: if type is not None: map3s[type] = {"ap_sum": ap_sum, "user_cnt": user_cnt, "mAP@3": ap_sum / user_cnt} ap_sum = 0 user_cnt = 0 type = ptype user_cnt += 1 items2 = items2.split() if len(items2) == 0: continue items1 = items1.split() if len(items1) == 0: continue ap = 0.0 cnt = 0.0 for i in xrange(min(3, len(items1))): if items1[i] in items2 and items1[i] not in items1[:i]: cnt += 1 ap += cnt / (i + 1) n = min(3, len(items2)) ap_sum += ap / n except StopIteration: pass map3s[type] = {"ap_sum": ap_sum, "user_cnt": user_cnt, "mAP@3": ap_sum / user_cnt} return map3s
def predict(train_path, test_path, outpath): test = Reader(test_path, skip_header=False) model = LFM(train_path).do_train() print "predict and write result...", t() with open(outpath, 'wb') as fp: for user, item, _ in test: r = model.predict(user, item) line = '\t'.join(map(str, [user, item, r])) + '\n' fp.write(line) t()
def gen_validation_set(inpath, train_path, validation_path, skip_header=False): reader = Reader(inpath, skip_header=skip_header) data_dir = os.path.dirname(inpath) train_set = open(os.path.join(data_dir, train_path), 'wb') validation_set = open(os.path.join(data_dir, validation_path), 'wb') for record in reader: time = record[-1] line = ','.join(map(str, record)) + '\n' # last 7 days used as validation set if time < 1320336000: train_set.write(line) else: validation_set.write(line) train_set.close() validation_set.close()
def mean_average_precision(submission_path, solution_path): submission_data = Reader(submission_path, lambda line: line.strip().split(',')) solution_data = Reader(solution_path, lambda line: line.strip().split(',')) map3s = {} ap_sum = 0 user_cnt = 0 type = None try: while True: _, items1 = submission_data.next() _, items2, ptype = solution_data.next() if ptype != type: if type is not None: map3s[type] = { 'ap_sum': ap_sum, 'user_cnt': user_cnt, 'mAP@3': ap_sum / user_cnt, } ap_sum = 0 user_cnt = 0 type = ptype user_cnt += 1 items2 = items2.split() if len(items2) == 0: continue items1 = items1.split() if len(items1) == 0: continue ap = 0.0 cnt = 0.0 for i in xrange(min(3, len(items1))): if items1[i] in items2 and items1[i] not in items1[:i]: cnt += 1 ap += cnt / (i+1) n = min(3, len(items2)) ap_sum += ap / n except StopIteration: pass map3s[type] = { 'ap_sum': ap_sum, 'user_cnt': user_cnt, 'mAP@3': ap_sum / user_cnt, } return map3s
def to_submission_format(predicted_path, outpath): def get_fields(line): return [ float(field) if '.' in field else int(field) for field in line.split(',') ] data = Reader(predicted_path, get_fields) print "convert predict result to dict...", t() public = {} private = {} for user, item, r, timestamp in data: if timestamp < 1321891200: tmp = public else: tmp = private tmp.setdefault(user, []).append((r, item)) t() def remove_duplicate(l): has = set() return [x for x in l if not (x in has or has.add(x))] print "convert to submission format...", t() with open(outpath, 'wb') as fp: def write_to_file(d): for user in sorted(d.keys()): items = remove_duplicate( [item for _, item in sorted(d[user], reverse=True)])[:3] fp.write("%s,%s\n" % (user, " ".join(map(str, items)))) write_to_file(public) write_to_file(private) t()
class LFM(object): def __init__(self, datapath): self.data = Reader(datapath, skip_header=False) self.get_data = lambda: self.data.sample(SAMPLES_NUMBER) self.b_u = {} self.b_i = {} self.q_i = {} self.p_u = {} self.avg_bu = 0 self.avg_bi = 0 def do_train(self): eta = ETA print "init LFM...", t() b_u, b_i, q_i, p_u = self.init_LFM() t() i = 1 while i < 1 + TRAIN_REPEAT and not received_exit_signal(): t() cnt = 0 average_e = 0 for record in self.get_data(): user, item, result = record[:3] # map [-1, 1] to [0, 1] e = ((result + 1) >> 1) - self.predict(user, item) average_e += e * e cnt += 1 b_u[user] += eta * (e - LAMBDA * b_u[user]) b_i[item] += eta * (e - LAMBDA * b_i[item]) for k in xrange(DIMENSION): p = p_u[user][k] q = q_i[item][k] q_i[item][k] += eta * (e * p - LAMBDA * q) p_u[user][k] += eta * (e * q - LAMBDA * p) average_e /= cnt print("%dth trainning used %.1fs\terror = %lf" % (i, t(False), average_e)) i += 1 print self.update_average_args() return self def init_LFM(self): self.u = 0.0 self.total = 0 for record in self.data.get_all(): user, item, result = record[:3] self.u += result self.total += 1 if user not in self.b_u: self.b_u[user] = 0 self.p_u.setdefault(user, self.random_qp()) if item not in self.b_i: self.b_i[item] = 0 self.q_i.setdefault(item, self.random_qp()) self.u /= self.total return [self.b_u, self.b_i, self.q_i, self.p_u] def random_qp(self): return [random.random() / SQRT_DIMENSION for _ in xrange(DIMENSION)] def predict(self, user, item): qp = self.compute_qp(user, item) predict_r = self.get_b_ui(user, item) + qp return predict_r def compute_qp(self, u, i): self.q_i.setdefault(i, self.random_qp()) self.p_u.setdefault(u, self.random_qp()) return sum(self.q_i[i][k] * self.p_u[u][k] for k in xrange(DIMENSION)) def get_b_ui(self, u, i): b_i = self.b_i.get(i, self.avg_bi) b_u = self.b_u.get(u, self.avg_bu) return self.u + b_i + b_u def update_average_args(self): bi = self.b_i.values() bu = self.b_u.values() self.avg_bi = sum(bi) / float(len(bi)) self.avg_bu = sum(bu) / float(len(bu))
class LFM(object): def __init__(self, datapath): self.data = Reader(datapath, skip_header=False) self.get_data = lambda: self.data.sample(SAMPLES_NUMBER) self.b_u = {} self.b_i = {} self.q_i = {} self.p_u = {} self.avg_bu = 0 self.avg_bi = 0 def do_train(self): eta = ETA print "init LFM...", t() b_u, b_i, q_i, p_u = self.init_LFM() t() i = 1 while i < 1 + TRAIN_REPEAT and not received_exit_signal(): t() cnt = 0 average_e = 0 for record in self.get_data(): user, item, result = record[:3] # map [-1, 1] to [0, 1] e = ((result + 1) >> 1) - self.predict(user, item) average_e += e * e cnt += 1 b_u[user] += eta * (e - LAMBDA * b_u[user]) b_i[item] += eta * (e - LAMBDA * b_i[item]) for k in xrange(DIMENSION): p = p_u[user][k] q = q_i[item][k] q_i[item][k] += eta * (e * p - LAMBDA * q) p_u[user][k] += eta * (e * q - LAMBDA * p) average_e /= cnt reprint("%dth trainning used %.1fs\terror = %lf" % (i, t(False), average_e)) i += 1 print self.update_average_args() return self def init_LFM(self): self.u = 0.0 self.total = 0 for record in self.data.get_all(): user, item, result = record[:3] self.u += result self.total += 1 if user not in self.b_u: self.b_u[user] = 0 self.p_u.setdefault(user, self.random_qp()) if item not in self.b_i: self.b_i[item] = 0 self.q_i.setdefault(item, self.random_qp()) self.u /= self.total return [self.b_u, self.b_i, self.q_i, self.p_u] def random_qp(self): return [random.random() / SQRT_DIMENSION for _ in xrange(DIMENSION)] def predict(self, user, item): qp = self.compute_qp(user, item) predict_r = self.get_b_ui(user, item) + qp return predict_r def compute_qp(self, u, i): self.q_i.setdefault(i, self.random_qp()) self.p_u.setdefault(u, self.random_qp()) return sum(self.q_i[i][k] * self.p_u[u][k] for k in xrange(DIMENSION)) def get_b_ui(self, u, i): b_i = self.b_i.get(i, self.avg_bi) b_u = self.b_u.get(u, self.avg_bu) return self.u + b_i + b_u def update_average_args(self): bi = self.b_i.values() bu = self.b_u.values() self.avg_bi = sum(bi) / float(len(bi)) self.avg_bu = sum(bu) / float(len(bu))
import numpy as np from data_reader import Reader from sklearn.preprocessing import LabelEncoder from keras.utils import np_utils import matplotlib.pyplot as plt from MLP import MLP x_train, x_test, y_train, y_test = Reader( 'data/Iris Data.txt').load_train_data() encoder = LabelEncoder() encoder.fit(y_train) encoder.fit(y_test) encoded_y_train, encoded_y_test = encoder.transform( y_train), encoder.transform(y_test) y_train, y_test = np_utils.to_categorical( encoded_y_train), np_utils.to_categorical(encoded_y_test) # Mean normalization for data mean1, mean2, mean3, mean4 = np.mean(x_train[:, 0]), np.mean( x_train[:, 1]), np.mean(x_train[:, 2]), np.mean(x_train[:, 3]) min1, max1 = x_train[:, 0].min(), x_train[:, 0].max() min2, max2 = x_train[:, 1].min(), x_train[:, 1].max() min3, max3 = x_train[:, 2].min(), x_train[:, 2].max() min4, max4 = x_train[:, 3].min(), x_train[:, 3].max() x_train[:, 0], x_train[:, 1], x_train[:, 2], x_train[:, 3] = ( x_train[:, 0] - mean1) / (max1 - min1), (x_train[:, 1] - mean2) / ( max2 - min2), (x_train[:, 2] - mean3) / (max3 - min3), ( x_train[:, 3] - mean4) / (max4 - min4) x_test[:, 0], x_test[:, 1], x_test[:, 2], x_test[:, 3] = ( x_test[:, 0] - mean1) / (max1 - min1), (x_test[:, 1] - mean2) / ( max2 - min2), (x_test[:, 2] - mean3) / (max3 - min3), (
from data_reader import Reader if not os.path.isfile(weight_path): raise IOError("Error: Pre-trained model doesn't exist!") if not os.path.isfile(src_data_path) or not os.path.isfile(tgt_data_path): raise IOError("Error: Data doesn't exist!") if not os.path.isdir(train_method_city_dir): os.makedirs(train_method_city_dir) if not os.path.isdir('./logfiles/'): os.mkdir('./logfiles/') assert iter_size > 1, 'iter_size should be larger than 1!' assert city in ['Taipei', 'Roma', 'Tokyo', 'Rio', 'Denmark','syn2real'], 'Please check the city name!' assert method in ['GA', 'GACA'], 'Please check the method name!' reader = Reader(src_data_path, tgt_data_path, input_width=input_width, input_height=input_height, batch_size=batch_size) model = FCN8VGG(weight_path) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu # create additional class-specific loss layer if city in ['Taipei', 'Roma', 'Tokyo', 'Rio', 'Denmark']: weak_loss = WeakLoss('Cityscapes') # create object and assign src dataset else: weak_loss = WeakLoss('Synthia') def cal_grad_func_impl(x, grad): return weak_loss.diff * grad # grad = 1.0 in lossLayer def py_func(func, inp, Tout, stateful=True, name=None, grad_func=None): grad_name = 'PyFuncGrad_' + str(np.random.randint(0, 1e+8)) tf.RegisterGradient(grad_name)(grad_func)
from data_reader import Reader # Initialize reader reader = Reader() #Read CSV data reader.csv('data.csv') #Format to X and y columns X, y = reader.X_y_split(reader.data, 4) X_train, X_test, y_train, y_test = reader.split_train_test(X, y)
def _set_up_train_net_multigpu(self): with tf.device("/cpu:0"): # learning rate decay with tf.name_scope("lr_decay"): if LR_POLICY == 'staircase': lr_breakpoints = [int(lbp) for lbp in LR_BREAKPOINT] lr_decays = [int(ld) for ld in LR_DECAY] assert len(lr_breakpoints) == len(lr_decays) pred_fn_pairs = [] for lr_decay, lr_breakpoint in zip(lr_decays, lr_breakpoints): fn = (lambda o: lambda: tf.constant(o, tf.float32) )(lr_decay) pred_fn_pairs.append((tf.less(self.global_step, lr_breakpoint), fn)) lr_decay = tf.case(pred_fn_pairs, default=(lambda: tf.constant(1.0))) else: logging.error("Unknown lr_policy: {}".format(LR_POLICY)) sys.exit(1) self.current_lr = lr_decay * BASE_LR tf.summary.scalar('lr', self.current_lr, collections=["brief"]) # input data with tf.name_scope("input_data"): batch_size = BATCH_SIZE train_data_list = os.path.join(DATA_DIR, DATA_NAME) train_reader = Reader(train_data_list, is_training=True) train_batch = train_reader.dequeue(batch_size) sub_batch_size = int(batch_size / N_GPUs) logging.info('Batch size is {} on each of the {} GPUs'.format( sub_batch_size, N_GPUs)) sub_batches = [] for i in range(N_GPUs): sub_batch = {} for k, v in train_batch.items(): sub_batch[k] = v[i * sub_batch_size:(i + 1) * sub_batch_size] sub_batches.append(sub_batch) if OPTIMIZER == 'sgd': optimizer = tf.train.MomentumOptimizer(self.current_lr, 0.9) logging.info('Using SGD optimizer. Momentum={}'.format(0.9)) elif OPTIMIZER == 'adam': optimizer = tf.train.AdamOptimizer(self.current_lr) logging.info('Using ADAM optimizer.') elif OPTIMIZER == 'rmsprop': optimizer = tf.train.RMSPropOptimizer(self.current_lr) logging.info('Using RMSProp optimizer.') else: logging.critical('Unsupported optimizer {}'.format(OPTIMIZER)) sys.exit(1) tower_grads = [] tower_losses = [] for i in range(N_GPUs): logging.info("Setting up tower %d" % i) with tf.device("/gpu:%d" % i): with tf.variable_scope(tf.get_variable_scope(), reuse=(i > 0)): with tf.name_scope("tower_%d" % i): loss = self._tower_loss(sub_batches[i], LOSS_TYPE) grads = optimizer.compute_gradients(loss) tower_grads.append(grads) tower_losses.append(loss) self.loss = tf.add_n([tower_losses]) tf.summary.scalar("total loss", self.loss, collections=['brief']) with tf.name_scope("average_loss"): grads = self._average_gradients(tower_grads) with tf.variable_scope("optimizer"): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.train_op = optimizer.apply_gradients( grads, global_step=self.global_step) for var in tf.all_variables(): summary_name = 'parameters/' + var.name.split(':')[0] tf.summary.histogram(summary_name, var, collections=['detailed']) self.brief_summary_op = tf.summary.merge_all(key='brief') self.detailed_summary_op = tf.summary.merge_all(key="detailed")
for i in range(len(y)): if y[i] == output[i]: correct = correct + 1 return (float(correct) / float(len(y))) * 100, output #main # Get the features to draw from the user a = int(input("Please Enter the first feature you want to train the data on: ")) b = int(input("Please Enter the second feature you want to train the data on: ")) # Get the two classes from the user class1 = int(input("Please Enter a number from 1 to 3: ")) class2 = int(input("Please Enter another number from 1 to 3: ")) x_train, x_test, y_train, y_test = Reader('data/Iris Data.txt').load_train_data(a, b, class1, class2) # Mean normalization for data mean1, mean2 = np.mean(x_train[:, 0]), np.mean(x_train[:, 1]) min1, max1 = x_train[:, 0].min(), x_train[:, 0].max() min2, max2 = x_train[:, 1].min(), x_train[:, 1].max() x_train[:, 0], x_train[:, 1] = (x_train[:, 0] - mean1) / (max1 - min1), (x_train[:, 1] - mean2) / (max2 - min2) x_test[:, 0], x_test[:, 1] = (x_test[:, 0] - mean1) / (max1 - min1), (x_test[:, 1] - mean2) / (max2 - min2) # Build the model slp = Perceptron() slp.fit(x_train, y_train, learning_rate=0.02, epochs=10) acc, predicted = slp.predict(x_test, y_test) print(acc)
settings.filters = args.filter settings.kl = args.kernelsize settings.s = args.strides settings.batch_norm = args.batchnorm settings.depth = args.depth settings.powerDepth = args.powerDepth ldir = [] rdir = [] ddir = [] for d in range(args.datanum): pathd = os.path.join(settings.dataset, str(d)) ldir = np.append(ldir, os.path.join(pathd, "left")) rdir = np.append(rdir, os.path.join(pathd, "right")) ddir = np.append(ddir, os.path.join(pathd, "depth")) print("The number of images: %i" % args.imgnum) reader = Reader(ldir=ldir, rdir=rdir, ddir=ddir) predictor = Predictor(settings=settings, reader=reader) #epock has 1000 images maxEpochs = 500 numPerIter = 100 Iter = 0 for epoch in range(0, maxEpochs): img_n = range(0, args.imgnum) while len(img_n) > 0: print(len(img_n)) if (len(img_n) > numPerIter): list = random.sample( img_n, numPerIter) #pickup numPerIter random images from dataset print("Initialized input data") reader.re_inti(list=list)