def __init__(self, datapath):
     self.data = Reader(datapath, skip_header=False)
     self.get_data = lambda: self.data.sample(SAMPLES_NUMBER)
     self.b_u = {}
     self.b_i = {}
     self.q_i = {}
     self.p_u = {}
     self.avg_bu = 0
     self.avg_bi = 0
Пример #2
0
def get_summary_of_dataset(filepath, skip_header=False, has_timestamp=True):
    reader = Reader(filepath, skip_header=skip_header)

    users = set()
    items = set()
    positive = 0
    negative = 0
    begin_time = time.time()
    end_time = 0
    hours = {}

    for record in reader:
        users.add(record[0])
        items.add(record[1])

        result = record[2]
        if result > 0:
            positive += 1
        elif result < 0:
            negative += 1

        if not has_timestamp:
            continue

        timestamp = record[3]
        if begin_time > timestamp:
            begin_time = timestamp
        if end_time < timestamp:
            end_time = timestamp

        hour = timestamp2hour(timestamp)
        hours[hour] = hours.get(hour, 0) + 1

    return locals()
Пример #3
0
def get_summary_of_user_profile(filepath, skip_header=False):
    def get_fields(line):
        record = line.split(",")

        try:
            age = 2012 - int(record[1])
        except:
            age = 0
        user = int(record[0])
        gender = int(record[2])
        num_of_tweet = int(record[3])
        tags = map(int, record[4].strip().split(";"))

        return [user, age, gender, num_of_tweet, tags]

    reader = Reader(filepath, skip_header=skip_header, get_fields=get_fields)

    ages = {}
    genders = {}
    ntweet = {}
    ntags = {}

    for user, age, gender, num_of_tweet, tags in reader:
        num_of_tags = len(tags)
        ages[age] = ages.get(age, 0) + 1
        genders[gender] = genders.get(gender, 0) + 1
        ntweet[num_of_tweet] = ntweet.get(num_of_tweet, 0) + 1
        ntags[num_of_tags] = ntags.get(num_of_tags, 0) + 1

    return {
        'age': ages,
        'gender': genders,
        'tweet': ntweet,
        'tags': ntags,
    }
 def __init__(self, datapath):
     self.data = Reader(datapath, skip_header=False)
     self.get_data = lambda: self.data.sample(SAMPLES_NUMBER)
     self.b_u = {}
     self.b_i = {}
     self.q_i = {}
     self.p_u = {}
     self.avg_bu = 0
     self.avg_bi = 0
Пример #5
0
    def __init__(self, settings=Settings(), reader=Reader()):
        self.settings = settings
        self.reader = reader
        self.train_data_loaded = False
        if self.settings.epe:
            self.best_epe = self.settings.epe
        else:
            self.best_epe = 100.0

        print("Dataset Path:", self.settings.dataset)
Пример #6
0
def mean_average_precision(submission_path, solution_path):
    submission_data = Reader(submission_path, lambda line: line.strip().split(","))
    solution_data = Reader(solution_path, lambda line: line.strip().split(","))

    map3s = {}
    ap_sum = 0
    user_cnt = 0
    type = None

    try:
        while True:
            _, items1 = submission_data.next()
            _, items2, ptype = solution_data.next()
            if ptype != type:
                if type is not None:
                    map3s[type] = {"ap_sum": ap_sum, "user_cnt": user_cnt, "mAP@3": ap_sum / user_cnt}
                ap_sum = 0
                user_cnt = 0
                type = ptype
            user_cnt += 1

            items2 = items2.split()
            if len(items2) == 0:
                continue
            items1 = items1.split()
            if len(items1) == 0:
                continue
            ap = 0.0
            cnt = 0.0

            for i in xrange(min(3, len(items1))):
                if items1[i] in items2 and items1[i] not in items1[:i]:
                    cnt += 1
                    ap += cnt / (i + 1)

            n = min(3, len(items2))
            ap_sum += ap / n
    except StopIteration:
        pass

    map3s[type] = {"ap_sum": ap_sum, "user_cnt": user_cnt, "mAP@3": ap_sum / user_cnt}
    return map3s
def predict(train_path, test_path, outpath):
    test = Reader(test_path, skip_header=False)
    model = LFM(train_path).do_train()

    print "predict and write result...",
    t()
    with open(outpath, 'wb') as fp:
        for user, item, _ in test:
            r = model.predict(user, item)
            line = '\t'.join(map(str, [user, item, r])) + '\n'
            fp.write(line)
    t()
def gen_validation_set(inpath, train_path, validation_path, skip_header=False):
    reader = Reader(inpath, skip_header=skip_header)
    data_dir = os.path.dirname(inpath)

    train_set = open(os.path.join(data_dir, train_path), 'wb')
    validation_set = open(os.path.join(data_dir, validation_path), 'wb')

    for record in reader:
        time = record[-1]
        line = ','.join(map(str, record)) + '\n'
        # last 7 days used as validation set
        if time < 1320336000:
            train_set.write(line)
        else:
            validation_set.write(line)

    train_set.close()
    validation_set.close()
Пример #9
0
def mean_average_precision(submission_path, solution_path):
    submission_data = Reader(submission_path, lambda line: line.strip().split(','))
    solution_data = Reader(solution_path, lambda line: line.strip().split(','))

    map3s = {}
    ap_sum = 0
    user_cnt = 0
    type = None

    try:
        while True:
            _, items1 = submission_data.next()
            _, items2, ptype = solution_data.next()
            if ptype != type:
                if type is not None:
                    map3s[type] = {
                        'ap_sum': ap_sum,
                        'user_cnt': user_cnt,
                        'mAP@3': ap_sum / user_cnt,
                    }
                ap_sum = 0
                user_cnt = 0
                type = ptype
            user_cnt += 1

            items2 = items2.split()
            if len(items2) == 0:
                continue
            items1 = items1.split()
            if len(items1) == 0:
                continue
            ap = 0.0
            cnt = 0.0

            for i in xrange(min(3, len(items1))):
                if items1[i] in items2 and items1[i] not in items1[:i]:
                    cnt += 1
                    ap += cnt / (i+1)

            n = min(3, len(items2))
            ap_sum += ap / n
    except StopIteration:
        pass

    map3s[type] = {
        'ap_sum': ap_sum,
        'user_cnt': user_cnt,
        'mAP@3': ap_sum / user_cnt,
    }
    return map3s
Пример #10
0
def to_submission_format(predicted_path, outpath):
    def get_fields(line):
        return [
            float(field) if '.' in field else int(field)
            for field in line.split(',')
        ]

    data = Reader(predicted_path, get_fields)

    print "convert predict result to dict...",
    t()
    public = {}
    private = {}
    for user, item, r, timestamp in data:
        if timestamp < 1321891200:
            tmp = public
        else:
            tmp = private
        tmp.setdefault(user, []).append((r, item))
    t()

    def remove_duplicate(l):
        has = set()
        return [x for x in l if not (x in has or has.add(x))]

    print "convert to submission format...",
    t()
    with open(outpath, 'wb') as fp:

        def write_to_file(d):
            for user in sorted(d.keys()):
                items = remove_duplicate(
                    [item for _, item in sorted(d[user], reverse=True)])[:3]
                fp.write("%s,%s\n" % (user, " ".join(map(str, items))))

        write_to_file(public)
        write_to_file(private)
    t()
class LFM(object):
    def __init__(self, datapath):
        self.data = Reader(datapath, skip_header=False)
        self.get_data = lambda: self.data.sample(SAMPLES_NUMBER)
        self.b_u = {}
        self.b_i = {}
        self.q_i = {}
        self.p_u = {}
        self.avg_bu = 0
        self.avg_bi = 0

    def do_train(self):
        eta = ETA

        print "init LFM...",
        t()
        b_u, b_i, q_i, p_u = self.init_LFM()
        t()

        i = 1
        while i < 1 + TRAIN_REPEAT and not received_exit_signal():
            t()
            cnt = 0
            average_e = 0

            for record in self.get_data():
                user, item, result = record[:3]
                # map [-1, 1] to [0, 1]
                e = ((result + 1) >> 1) - self.predict(user, item)
                average_e += e * e
                cnt += 1

                b_u[user] += eta * (e - LAMBDA * b_u[user])
                b_i[item] += eta * (e - LAMBDA * b_i[item])
                for k in xrange(DIMENSION):
                    p = p_u[user][k]
                    q = q_i[item][k]
                    q_i[item][k] += eta * (e * p - LAMBDA * q)
                    p_u[user][k] += eta * (e * q - LAMBDA * p)

            average_e /= cnt
            print("%dth trainning used %.1fs\terror = %lf" %
                  (i, t(False), average_e))
            i += 1

        print
        self.update_average_args()
        return self

    def init_LFM(self):
        self.u = 0.0
        self.total = 0

        for record in self.data.get_all():
            user, item, result = record[:3]
            self.u += result
            self.total += 1

            if user not in self.b_u:
                self.b_u[user] = 0
                self.p_u.setdefault(user, self.random_qp())
            if item not in self.b_i:
                self.b_i[item] = 0
                self.q_i.setdefault(item, self.random_qp())

        self.u /= self.total
        return [self.b_u, self.b_i, self.q_i, self.p_u]

    def random_qp(self):
        return [random.random() / SQRT_DIMENSION for _ in xrange(DIMENSION)]

    def predict(self, user, item):
        qp = self.compute_qp(user, item)
        predict_r = self.get_b_ui(user, item) + qp
        return predict_r

    def compute_qp(self, u, i):
        self.q_i.setdefault(i, self.random_qp())
        self.p_u.setdefault(u, self.random_qp())
        return sum(self.q_i[i][k] * self.p_u[u][k] for k in xrange(DIMENSION))

    def get_b_ui(self, u, i):
        b_i = self.b_i.get(i, self.avg_bi)
        b_u = self.b_u.get(u, self.avg_bu)
        return self.u + b_i + b_u

    def update_average_args(self):
        bi = self.b_i.values()
        bu = self.b_u.values()
        self.avg_bi = sum(bi) / float(len(bi))
        self.avg_bu = sum(bu) / float(len(bu))
class LFM(object):
    def __init__(self, datapath):
        self.data = Reader(datapath, skip_header=False)
        self.get_data = lambda: self.data.sample(SAMPLES_NUMBER)
        self.b_u = {}
        self.b_i = {}
        self.q_i = {}
        self.p_u = {}
        self.avg_bu = 0
        self.avg_bi = 0

    def do_train(self):
        eta = ETA

        print "init LFM...",
        t()
        b_u, b_i, q_i, p_u = self.init_LFM()
        t()

        i = 1
        while i < 1 + TRAIN_REPEAT and not received_exit_signal():
            t()
            cnt = 0
            average_e = 0

            for record in self.get_data():
                user, item, result = record[:3]
                # map [-1, 1] to [0, 1]
                e = ((result + 1) >> 1) - self.predict(user, item)
                average_e += e * e
                cnt += 1

                b_u[user] += eta * (e - LAMBDA * b_u[user])
                b_i[item] += eta * (e - LAMBDA * b_i[item])
                for k in xrange(DIMENSION):
                    p = p_u[user][k]
                    q = q_i[item][k]
                    q_i[item][k] += eta * (e * p - LAMBDA * q)
                    p_u[user][k] += eta * (e * q - LAMBDA * p)

            average_e /= cnt
            reprint("%dth trainning used %.1fs\terror = %lf" % (i, t(False), average_e))
            i += 1

        print
        self.update_average_args()
        return self

    def init_LFM(self):
        self.u = 0.0
        self.total = 0

        for record in self.data.get_all():
            user, item, result = record[:3]
            self.u += result
            self.total += 1

            if user not in self.b_u:
                self.b_u[user] = 0
                self.p_u.setdefault(user, self.random_qp())
            if item not in self.b_i:
                self.b_i[item] = 0
                self.q_i.setdefault(item, self.random_qp())

        self.u /= self.total
        return [self.b_u, self.b_i, self.q_i, self.p_u]

    def random_qp(self):
        return [random.random() / SQRT_DIMENSION for _ in xrange(DIMENSION)]

    def predict(self, user, item):
        qp = self.compute_qp(user, item)
        predict_r = self.get_b_ui(user, item) + qp
        return predict_r

    def compute_qp(self, u, i):
        self.q_i.setdefault(i, self.random_qp())
        self.p_u.setdefault(u, self.random_qp())
        return sum(self.q_i[i][k] * self.p_u[u][k] for k in xrange(DIMENSION))

    def get_b_ui(self, u, i):
        b_i = self.b_i.get(i, self.avg_bi)
        b_u = self.b_u.get(u, self.avg_bu)
        return self.u + b_i + b_u

    def update_average_args(self):
        bi = self.b_i.values()
        bu = self.b_u.values()
        self.avg_bi = sum(bi) / float(len(bi))
        self.avg_bu = sum(bu) / float(len(bu))
Пример #13
0
import numpy as np
from data_reader import Reader
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils
import matplotlib.pyplot as plt
from MLP import MLP

x_train, x_test, y_train, y_test = Reader(
    'data/Iris Data.txt').load_train_data()
encoder = LabelEncoder()
encoder.fit(y_train)
encoder.fit(y_test)
encoded_y_train, encoded_y_test = encoder.transform(
    y_train), encoder.transform(y_test)
y_train, y_test = np_utils.to_categorical(
    encoded_y_train), np_utils.to_categorical(encoded_y_test)

# Mean normalization for data
mean1, mean2, mean3, mean4 = np.mean(x_train[:, 0]), np.mean(
    x_train[:, 1]), np.mean(x_train[:, 2]), np.mean(x_train[:, 3])
min1, max1 = x_train[:, 0].min(), x_train[:, 0].max()
min2, max2 = x_train[:, 1].min(), x_train[:, 1].max()
min3, max3 = x_train[:, 2].min(), x_train[:, 2].max()
min4, max4 = x_train[:, 3].min(), x_train[:, 3].max()
x_train[:, 0], x_train[:, 1], x_train[:, 2], x_train[:, 3] = (
    x_train[:, 0] - mean1) / (max1 - min1), (x_train[:, 1] - mean2) / (
        max2 - min2), (x_train[:, 2] - mean3) / (max3 - min3), (
            x_train[:, 3] - mean4) / (max4 - min4)
x_test[:, 0], x_test[:, 1], x_test[:, 2], x_test[:, 3] = (
    x_test[:, 0] - mean1) / (max1 - min1), (x_test[:, 1] - mean2) / (
        max2 - min2), (x_test[:, 2] - mean3) / (max3 - min3), (
from data_reader import Reader

if not os.path.isfile(weight_path):
    raise IOError("Error: Pre-trained model doesn't exist!")
if not os.path.isfile(src_data_path) or not os.path.isfile(tgt_data_path):
    raise IOError("Error: Data doesn't exist!")
if not os.path.isdir(train_method_city_dir):
    os.makedirs(train_method_city_dir)
if not os.path.isdir('./logfiles/'):
    os.mkdir('./logfiles/')

assert iter_size > 1, 'iter_size should be larger than 1!' 
assert city in ['Taipei', 'Roma', 'Tokyo', 'Rio', 'Denmark','syn2real'], 'Please check the city name!'
assert method in ['GA', 'GACA'], 'Please check the method name!'

reader = Reader(src_data_path, tgt_data_path, input_width=input_width, input_height=input_height, batch_size=batch_size)
model = FCN8VGG(weight_path)
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

# create additional class-specific loss layer
if city in ['Taipei', 'Roma', 'Tokyo', 'Rio', 'Denmark']:
    weak_loss = WeakLoss('Cityscapes') # create object and assign src dataset
else:
    weak_loss = WeakLoss('Synthia') 

def cal_grad_func_impl(x, grad):
    return weak_loss.diff * grad    # grad = 1.0 in lossLayer

def py_func(func, inp, Tout, stateful=True, name=None, grad_func=None):
    grad_name = 'PyFuncGrad_' + str(np.random.randint(0, 1e+8))
    tf.RegisterGradient(grad_name)(grad_func)
Пример #15
0
from data_reader import Reader

# Initialize reader
reader = Reader()

#Read CSV data
reader.csv('data.csv')

#Format to X and y columns
X, y = reader.X_y_split(reader.data, 4)

X_train, X_test, y_train, y_test = reader.split_train_test(X, y)
    def _set_up_train_net_multigpu(self):
        with tf.device("/cpu:0"):
            # learning rate decay
            with tf.name_scope("lr_decay"):
                if LR_POLICY == 'staircase':
                    lr_breakpoints = [int(lbp) for lbp in LR_BREAKPOINT]
                    lr_decays = [int(ld) for ld in LR_DECAY]
                    assert len(lr_breakpoints) == len(lr_decays)
                    pred_fn_pairs = []
                    for lr_decay, lr_breakpoint in zip(lr_decays,
                                                       lr_breakpoints):
                        fn = (lambda o: lambda: tf.constant(o, tf.float32)
                              )(lr_decay)
                        pred_fn_pairs.append((tf.less(self.global_step,
                                                      lr_breakpoint), fn))
                    lr_decay = tf.case(pred_fn_pairs,
                                       default=(lambda: tf.constant(1.0)))
                else:
                    logging.error("Unknown lr_policy: {}".format(LR_POLICY))
                    sys.exit(1)
                self.current_lr = lr_decay * BASE_LR
                tf.summary.scalar('lr', self.current_lr, collections=["brief"])

            # input data
            with tf.name_scope("input_data"):
                batch_size = BATCH_SIZE
                train_data_list = os.path.join(DATA_DIR, DATA_NAME)
                train_reader = Reader(train_data_list, is_training=True)
                train_batch = train_reader.dequeue(batch_size)
                sub_batch_size = int(batch_size / N_GPUs)
                logging.info('Batch size is {} on each of the {} GPUs'.format(
                    sub_batch_size, N_GPUs))
                sub_batches = []
                for i in range(N_GPUs):
                    sub_batch = {}
                    for k, v in train_batch.items():
                        sub_batch[k] = v[i * sub_batch_size:(i + 1) *
                                         sub_batch_size]
                    sub_batches.append(sub_batch)

            if OPTIMIZER == 'sgd':
                optimizer = tf.train.MomentumOptimizer(self.current_lr, 0.9)
                logging.info('Using SGD optimizer. Momentum={}'.format(0.9))
            elif OPTIMIZER == 'adam':
                optimizer = tf.train.AdamOptimizer(self.current_lr)
                logging.info('Using ADAM optimizer.')
            elif OPTIMIZER == 'rmsprop':
                optimizer = tf.train.RMSPropOptimizer(self.current_lr)
                logging.info('Using RMSProp optimizer.')
            else:
                logging.critical('Unsupported optimizer {}'.format(OPTIMIZER))
                sys.exit(1)

            tower_grads = []
            tower_losses = []
            for i in range(N_GPUs):
                logging.info("Setting up tower %d" % i)
                with tf.device("/gpu:%d" % i):
                    with tf.variable_scope(tf.get_variable_scope(),
                                           reuse=(i > 0)):
                        with tf.name_scope("tower_%d" % i):
                            loss = self._tower_loss(sub_batches[i], LOSS_TYPE)
                            grads = optimizer.compute_gradients(loss)
                            tower_grads.append(grads)
                            tower_losses.append(loss)
            self.loss = tf.add_n([tower_losses])
            tf.summary.scalar("total loss", self.loss, collections=['brief'])
            with tf.name_scope("average_loss"):
                grads = self._average_gradients(tower_grads)
            with tf.variable_scope("optimizer"):
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                with tf.control_dependencies(update_ops):
                    self.train_op = optimizer.apply_gradients(
                        grads, global_step=self.global_step)

            for var in tf.all_variables():
                summary_name = 'parameters/' + var.name.split(':')[0]
                tf.summary.histogram(summary_name,
                                     var,
                                     collections=['detailed'])
            self.brief_summary_op = tf.summary.merge_all(key='brief')
            self.detailed_summary_op = tf.summary.merge_all(key="detailed")
        for i in range(len(y)):
            if y[i] == output[i]:
                correct = correct + 1
        return (float(correct) / float(len(y))) * 100, output


#main
# Get the features to draw from the user
a = int(input("Please Enter the first feature you want to train the data on: "))
b = int(input("Please Enter the second feature you want to train the data on: "))

# Get the two classes from the user
class1 = int(input("Please Enter a number from 1 to 3: "))
class2 = int(input("Please Enter another number from 1 to 3: "))

x_train, x_test, y_train, y_test = Reader('data/Iris Data.txt').load_train_data(a, b, class1, class2)

# Mean normalization for data
mean1, mean2 = np.mean(x_train[:, 0]), np.mean(x_train[:, 1])
min1, max1 = x_train[:, 0].min(), x_train[:, 0].max()
min2, max2 = x_train[:, 1].min(), x_train[:, 1].max()
x_train[:, 0], x_train[:, 1] = (x_train[:, 0] - mean1) / (max1 - min1), (x_train[:, 1] - mean2) / (max2 - min2)
x_test[:, 0], x_test[:, 1] = (x_test[:, 0] - mean1) / (max1 - min1), (x_test[:, 1] - mean2) / (max2 - min2)

# Build the model
slp = Perceptron()
slp.fit(x_train, y_train, learning_rate=0.02, epochs=10)
acc, predicted = slp.predict(x_test, y_test)
print(acc)

Пример #18
0
 settings.filters = args.filter
 settings.kl = args.kernelsize
 settings.s = args.strides
 settings.batch_norm = args.batchnorm
 settings.depth = args.depth
 settings.powerDepth = args.powerDepth
 ldir = []
 rdir = []
 ddir = []
 for d in range(args.datanum):
     pathd = os.path.join(settings.dataset, str(d))
     ldir = np.append(ldir, os.path.join(pathd, "left"))
     rdir = np.append(rdir, os.path.join(pathd, "right"))
     ddir = np.append(ddir, os.path.join(pathd, "depth"))
 print("The number of images: %i" % args.imgnum)
 reader = Reader(ldir=ldir, rdir=rdir, ddir=ddir)
 predictor = Predictor(settings=settings, reader=reader)
 #epock has 1000 images
 maxEpochs = 500
 numPerIter = 100
 Iter = 0
 for epoch in range(0, maxEpochs):
     img_n = range(0, args.imgnum)
     while len(img_n) > 0:
         print(len(img_n))
         if (len(img_n) > numPerIter):
             list = random.sample(
                 img_n,
                 numPerIter)  #pickup numPerIter random images from dataset
             print("Initialized input data")
             reader.re_inti(list=list)