示例#1
0
    def __getitem__(self, idx):
        shape_rgb = self.shape_rgb
        shape_depth = self.shape_depth
        batch_x = np.zeros(shape_rgb[:3] + (5 if self.locations else 3, ),
                           np.float32)
        if self.locations:
            batch_y = []  # size can vary
        else:
            batch_y = np.zeros(shape_depth[:3] + (1, ), np.float32)

        for i in range(self.batch_size):
            index = min((idx * self.batch_size) + i, self.N - 1)

            sample = self.dataset[index]

            x = np.clip(
                np.asarray(Image.open(BytesIO(self.data[sample[0]]))).reshape(
                    480, 640, 3) / 255, 0, 1)
            y = np.asarray(Image.open(BytesIO(self.data[sample[1]])),
                           dtype=np.float32).reshape(
                               480, 640, 1).copy().astype(float) / 10.0
            y = DepthNorm(y, maxDepth=self.maxDepth)

            # adjust sizes
            if self.nus is not None:
                u, v = sampling(y[:, :, 0], self.shape_rgb[1:3], self.nus)
                locations = [v, u]
                x = x[v.astype(int), u.astype(int), :]
                if not self.locations:
                    u, v = sampling(y[:, :, 0], self.shape_depth[1:3],
                                    self.nus)
                    y = y[v.astype(int), u.astype(int), :]
            else:
                shape = x.shape
                locations = np.mgrid[:shape[0] - 1:shape_rgb[1] *
                                     1j, :shape[1] - 1:shape_rgb[2] * 1j]
                sample = np.round(locations).astype(int)
                x = x[sample[0], sample[1], :]
                if not self.locations:
                    sample = np.round(
                        np.mgrid[:shape[0] - 1:shape_depth[1] * 1j, :shape[1] -
                                 1:shape_depth[2] * 1j]).astype(int)
                    y = y[sample[0], sample[1]]

            batch_x[i, ..., :3] = x
            if self.locations:
                batch_x[i, ..., 3:] = np.stack(locations, -1)
                batch_y.append(y)
            else:
                batch_y[i, ..., 0] = y

        if self.locations:
            max_x, max_y = np.max([x.shape[:2] for x in batch_y], axis=0)
            batch_y = np.stack([
                np.pad(x, [(0, max_x - x.shape[0]), (0, max_y - x.shape[1]),
                           (0, 0)], 'constant') for x in batch_y
            ])

        return batch_x, batch_y
    def call(self,
             inputs,
             decoder_type='argmax',
             k=25,
             p=0.9,
             temperature=0.8):
        """
        Args:
            inputs: it is a list of the previous token, memorized keys and values, and size of the previous tokens.
                    token shape = (1, 1, 2 (ID and position))
                    mem_k shape = (number of layers, 1, number of heads, attn hidden size, context size)
                    mem_v shape = (number of layers, 1, number of heads, context size, attn hidden size)
                    length = an integer

        Returns:
            next_token: shape = (1, 1, 2 (ID and position))
            mem_k: shape = (number of layers, 1, number of heads, attn hidden size, context size)
            mem_v: shape = (number of layers, 1, number of heads, context size, attn hidden size)
        """

        token = inputs[0]
        mem_k = inputs[1]
        mem_v = inputs[2]
        length = inputs[3]

        new_mem_k = []
        new_mem_v = []

        hidden = self.embed(token)

        for i, block in enumerate(self.transformer_stack):
            hidden, k, v = block(hidden, mem_k[i], mem_v[i], length)
            new_mem_k.append(k)
            new_mem_v.append(v)

        mem_k = tf.stack(new_mem_k)
        mem_v = tf.stack(new_mem_v)

        logit = tf.reshape(
            tf.matmul(hidden[0, :, :],
                      self.embed.we[:self.n_vocab, :],
                      transpose_b=True), [self.n_vocab])

        if decoder_type == 'argmax':
            next_token = argmax(logit)

        elif decoder_type == 'top-k':
            next_token = top_k_sampling(logit, k, temperature)

        elif decoder_type == 'nucleus':
            next_token = nucleus_sampling(logit, p)

        else:
            next_token = sampling(logit, temperature)

        return next_token, mem_k, mem_v
示例#3
0
    def __getitem__(self, idx, is_apply_policy=True):
        batch_x, batch_y = np.zeros(self.shape_rgb), np.zeros(self.shape_depth)

        # Augmentation of RGB images
        for i in range(batch_x.shape[0]):
            index = min((idx * self.batch_size) + i, self.N - 1)

            sample = self.dataset[index]

            x = np.clip(
                np.asarray(Image.open(BytesIO(self.data[sample[0]]))).reshape(
                    480, 640, 3) / 255, 0, 1)
            y = np.clip(
                np.asarray(Image.open(BytesIO(self.data[sample[1]]))).reshape(
                    480, 640, 1) / 255 * self.maxDepth, 0, self.maxDepth)

            if self.nus is not None:
                u, v = sampling(y[:, :, 0], self.shape_rgb[1:3], self.nus)
                x = x[v.astype(int), u.astype(int), :]
                u, v = sampling(y[:, :, 0], self.shape_depth[1:3], self.nus)
                y = y[v.astype(int), u.astype(int), :]
            else:
                x = nyu_resize(x, 480)
                y = nyu_resize(y, 240)

            y = DepthNorm(y, maxDepth=self.maxDepth)

            batch_x[i] = x
            batch_y[i] = y

            if is_apply_policy:
                batch_x[i], batch_y[i] = self.policy(batch_x[i], batch_y[i])

            # DEBUG:
            #self.policy.debug_img(batch_x[i], np.clip(DepthNorm(batch_y[i])/maxDepth,0,1), idx, i)
        #exit()

        return batch_x, batch_y
示例#4
0
文件: SVD.py 项目: Daftstone/APT
 def __init__(self, num_users, num_items, dataset, extend):
     self.num_users = num_users
     self.num_items = num_items
     self.num_factors = FLAGS.embed_size
     self.reg = 1e-12
     self.dataset = dataset
     self.coo_mx = self.dataset.trainMatrix.tocoo()
     self.mu_np = np.mean(self.coo_mx.data)
     self.mu = tf.constant(self.mu_np, shape=[], dtype=tf.float32)
     self.extend = extend
     self.samples = utils.sampling(self.dataset, 0)
     self.trainmatrix = self.dataset.trainMatrix.toarray()
     self.rate_mask = self.trainmatrix != 0
     self.type = 'adv'
    def call(self,
             inputs,
             decoder_type='argmax',
             k=25,
             p=0.9,
             temperature=0.8):
        """
        Args:
            inputs: it is a list of ID and position of the input, and size of it
                    length = an integer
                    tokens shape = (batch size, context length, 2 (IDs and positions))

        Returns:
            next_token: shape = (1, 1, 2 (ID and position))
            mem_k: shape = (number of layers, 1, number of heads, attn hidden size, context size)
            mem_v: shape = (number of layers, 1, number of heads, context size, attn hidden size)
        """

        length = inputs[0]
        tokens = inputs[1]
        hidden = self.embed(tokens)
        mem_k, mem_v = [], []

        for i, block in enumerate(self.transformer_stack):
            hidden, k, v = block(hidden)
            mem_k.append(k)
            mem_v.append(v)

        mem_k = tf.stack(mem_k)
        mem_v = tf.stack(mem_v)

        logit = tf.reshape(
            tf.matmul(hidden[:, length, :],
                      self.embed.we[:self.n_vocab, :],
                      transpose_b=True), [self.n_vocab])

        if decoder_type == 'argmax':
            next_token = argmax(logit)

        elif decoder_type == 'top-k':
            next_token = top_k_sampling(logit, k, temperature)

        elif decoder_type == 'nucleus':
            next_token = nucleus_sampling(logit, p)

        else:
            next_token = sampling(logit, temperature)

        return next_token, mem_k, mem_v
示例#6
0
文件: pure_SVD.py 项目: Daftstone/APT
    def train(self, dataset, is_train, nb_epochs, weight1, use_weight=True):
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=config)
        ckpt_save_path = "pretrain/%s/%s/embed_%d/model_%s_%s_%d" % (
            FLAGS.dataset, FLAGS.rs, FLAGS.embed_size, FLAGS.attack_type,
            FLAGS.gpu, FLAGS.target_item[0])
        if (not os.path.exists(ckpt_save_path)):
            os.makedirs(ckpt_save_path)

        saver_ckpt = tf.train.Saver()

        # pretrain or not
        self.sess.run(tf.global_variables_initializer())

        # initialize test data for Evaluate
        samples = utils.sampling(dataset, 0)

        print("all", samples[0].shape)
        for cur_epochs in range(nb_epochs):
            batchs = utils.get_batchs(samples, FLAGS.batch_size)
            for i in range(len(batchs)):
                users, items, rates = batchs[i]
                feed_dict = {
                    self.users_holder: users,
                    self.items_holder: items,
                    self.ratings_holder: rates
                }
                tt = self.sess.run([self.train_op], feed_dict)

            if (cur_epochs % FLAGS.per_epochs == 0
                    or cur_epochs == nb_epochs - 1):
                if ((FLAGS.dataset == 'yelp' or FLAGS.dataset == 'music')
                        and cur_epochs != nb_epochs - 1):
                    rate = self.sess.run(self.rate_partial)
                else:
                    rate = self.sess.run(self.rate)
                hr, ndcg = utils.train_evalute(rate, dataset, cur_epochs)
                # utils.save_model(ckpt_save_path, saver_ckpt, self.sess)
        # self.output_evaluate(self.sess, dataset, test_data, 0)
        return hr, ndcg
示例#7
0
import sklearn
import numpy as np
from utils import sampling



if __name__ == "__main__":
    light = np.load("lights.npy")
    xdata = np.load("X_data.npy")
    tenper = len(xdata) // 10
    unlabeled, labeled = sampling(len(xdata), tenper)

    labeled_data = xdata[labeled, :]
    label = light[labeled, :]

    for i in range(4):
        label_feature = np.zeros(4)
        label_feature[i] = 1
        label_index = np.where(label == label_feature)
        print(labeled_data[label_index])


def main():
    parser = OptionParser()
    parser.add_option('-o', dest='filename', default='graph.gexf',
                      help='output filename')
    parser.add_option('--names', dest='names', default='data/names.json',
                      help='names filename')
    parser.add_option('--clusters', dest='clusters',
                      default='data/clusters.json',
                      help='clusters filename')
    parser.add_option('--start', dest='start', default='20110301',
                      help='start date (default=20110301)')
    parser.add_option('--stop', dest='stop', default='20130401',
                      help='stop date (default=20130401)')
    parser.add_option('--sampling', dest='sampling', type='float', default=1.0,
                      help='tweet sampling rate (default=1.0)')
    parser.add_option('--influencers', dest='n', type='int', default=None,
                      help='maximum number of influencers (default=inf)')
    parser.add_option('--mincount', dest='mincount', type='int', default=1,
                      help='minimum number of retweet (default=1)')
    parser.add_option('--mindegree', dest='mindegree', type='int', default=0,
                      help='minimum acceptable degree of nodes (default=0)')
    parser.add_option('--group', dest='group', default='month',
                      help='month or week (default=month)')
    options, args = parser.parse_args()

    names = load_names(options.names)
    clusters = load_clusters(options.clusters)
    start_date = ymd_to_datetime(options.start)
    stop_date = ymd_to_datetime(options.stop)

    tweets = load_tweet(*args)
    tweets = filter_by_datetime(tweets, start_date, stop_date)
    random.seed(0)
    tweets = sampling(tweets, options.sampling)

    if options.group == 'month':
        groups = [t.strftime('t%Y%m%d') for t
                  in months_between(start_date, stop_date)]
    else:
        groups = [t.strftime('t%Y%m%d') for t
                  in weeks_between(start_date, stop_date)]

    graph = nx.DiGraph()
    graph.graph['groups'] = groups
    for ruid, tss in group_by_influencer(tweets, options.n, options.mincount):
        for uid, ts in group_by_user(tss):
            ts = list(ts)
            labels = {d: False for d in groups}
            for t in ts:
                d = to_datetime(t['created_at']).strftime('t%Y%m%d')
                labels[groups[bisect_right(groups, d) - 1]] = True
            graph.add_edge(ruid, uid, weight=len(ts), **labels)

    for node in graph.nodes():
        if graph.degree(node, 'weight') < options.mindegree:
            graph.remove_node(node)
            continue
        graph.node[node]['label'] = names.get(node, '')
        graph.node[node]['cluster'] = clusters.get(node, -1)
        if graph.out_degree(node) == 0:
            cluster_count = {}
            for ruid in graph.predecessors(node):
                c = clusters.get(ruid, -1)
                if c not in cluster_count:
                    cluster_count[c] = 0
                cluster_count[c] += 1
            graph.node[node]['cluster'] = max(cluster_count.items(),
                                              key=lambda r: r[1])[0]
        for d in groups:
            graph.node[node][d] = False

    for d in groups:
        for u, v in graph.edges():
            if graph[u][v][d]:
                graph.node[u][d] = True
                graph.node[v][d] = True

    out_format = options.filename.split('.')[-1]
    if out_format == 'gexf':
        nx.write_gexf(graph, options.filename)
    else:
        obj = json_graph.node_link_data(graph)
        json.dump(obj, open(options.filename, 'w'))
    print('(|V|, |E|) = ({}, {})'.format(graph.number_of_nodes(),
                                         graph.number_of_edges()))
    # Verifica se caminho existe
    root = os.path.join(output_path, sample, disease)
    if not os.path.exists(root):
        os.makedirs(root)

    # Copia imagens para pasta de destino
    for image in tqdm(paths):
        shutil.copy2(image, root)


if __name__ == '__main__':
    from argparse import ArgumentParser
    parser = ArgumentParser()
    parser.add_argument("--input_path",
                        help="Input path with main disease folder",
                        required=True)
    parser.add_argument("--output_path",
                        help="Output path to send sampled images",
                        required=True)
    parser.add_argument("--sample",
                        help="Type of sample",
                        choices=["train", "validation", "test"],
                        required=True)
    args = parser.parse_args()

    disease = args.input_path.split(os.path.sep)[-1]
    patient_ids, images = get_unique_ids_and_paths(args.input_path)
    ids = sampling(patient_ids)
    save_images(ids, images, disease, args.sample, args.output_path)
示例#10
0
文件: main.py 项目: JunxuLiu/PFA
def main(unused_argv):

  print(FLAGS.model)
  project_path = os.getcwd()
  # load dataset
  x_train, y_train, x_test, y_test = load_dataset(FLAGS.dataset, project_path)
  print('x_train:{} y_train:{} / x_test:{}, y_test:{}'.format(len(x_train), len(y_train), len(x_test), len(y_test)))
  # split data
  client_set_path = os.path.join(project_path, 
                                 'dataset', FLAGS.dataset, \
                                 'clients', \
                                 ('noniid' if FLAGS.noniid else 'iid'), \
                                 'v{}'.format(FLAGS.version))
  #client_set_path = project_path + '/dataset/' + FLAGS.dataset + '/clients/' + ('noniid' if FLAGS.noniid else 'iid')
  client_dataset_size = len(x_train) // FLAGS.N if FLAGS.client_dataset_size is None else FLAGS.client_dataset_size
  if not FLAGS.noniid:
    client_set = create_iid_clients(FLAGS.N, len(x_train), 10, client_dataset_size, client_set_path)
  else:  
    client_set = create_noniid_clients(FLAGS.N, len(x_train), 10, client_dataset_size, client_set_path)
  print('client dataset size: {}'.format(len(client_set[0])))

  COMM_ROUND = int(FLAGS.max_steps / FLAGS.local_steps)
  print('communication rounds:{}'.format(COMM_ROUND))

  # set personalized privacy budgets  
  if FLAGS.dpsgd:
    if (FLAGS.eps == 'epsilons' or FLAGS.eps == 'epsilonsu'):
        epsilons, threshold = set_epsilons(FLAGS.eps, FLAGS.N, is_distributions = False)
    else:
        epsilons, threshold = set_epsilons(FLAGS.eps, FLAGS.N, is_distributions = True)

    print('epsilons:{}, \nthreshold:{}'.format(epsilons, threshold))

    noise_multiplier = []
    for i in range(FLAGS.N):
      q = FLAGS.client_batch_size / len(client_set[i])
      nm = 10 * q * math.sqrt(FLAGS.max_steps * FLAGS.sample_ratio * (-math.log10(FLAGS.delta))) / epsilons[i]
      noise_multiplier.append(nm)
    print('noise_multiplier:', noise_multiplier)
    budgets_accountant = BudgetsAccountant(FLAGS.N, epsilons, FLAGS.delta, noise_multiplier, FLAGS.local_steps, threshold)


  if FLAGS.sample_mode is None:
    m = FLAGS.N
  else:
    m = int(FLAGS.sample_ratio * FLAGS.N)
  print('number of clients per round: {}'.format(m))
 
  start_time = time.time()

  accuracy_accountant = []
  # define tensors and operators in the graph 'g_c'
  with tf.Graph().as_default():
    # build model
    if FLAGS.dpsgd:
      train_op_list, eval_op, loss, data_placeholder, labels_placeholder = nets.mnist_model(FLAGS, \
                epsilons, noise_multiplier)
    else:
      train_op_list, eval_op, loss, data_placeholder, labels_placeholder = nets.mnist_model(FLAGS)

    # increase and set global step
    increase_global_step, set_global_step = global_step_creator()

    # dict, each key-value pair corresponds to the placeholder_name of each tf.trainable_variables
    # and its placeholder.
    # trainable_variables: the placeholder name corresponding to each tf.trainable variable.
    model_placeholder = dict(zip([Vname_to_FeedPname(var) for var in tf.trainable_variables()],
                                 [tf.placeholder(name=Vname_to_Pname(var),
                                                 shape=var.get_shape(),
                                                 dtype=tf.float32)
                                  for var in tf.trainable_variables()]))

    # all trainable variables are set to the value specified through
    # the placeholders in 'model_placeholder'.
    assignments = [tf.assign(var, model_placeholder[Vname_to_FeedPname(var)]) for var in
                   tf.trainable_variables()]

    #init = tf.global_variables_initializer()
    with tf.Session(config=tf.ConfigProto(
            log_device_placement=False, \
            allow_soft_placement=True, \
            gpu_options=tf.GPUOptions(allow_growth=True))) as sess:

      #sess.run(tf.global_variables_initializer())
      sess.run(tf.initialize_all_variables())
      # initial global model and errors
      model = dict(zip([Vname_to_FeedPname(var) for var in tf.trainable_variables()],
                       [sess.run(var) for var in tf.trainable_variables()]))
      model['global_step_placeholder:0'] = 0
      #errors = [ np.zeros(var.get_shape()) for var in tf.trainable_variables()]
      errors = list(model.values()) if FLAGS.error_feedback else [0]*len(tf.trainable_variables())
      #server.set_global_model(model)

      # initial server aggregation
      #w = weights if FLAGS.wei_avg else None
      server = ServerAggregation(model, FLAGS.dpsgd, FLAGS.projection, FLAGS.proj_dims, FLAGS.lanczos_iter, FLAGS.wei_avg)

      # initial local update
      local = LocalUpdate(x_train, y_train, client_set, FLAGS.client_batch_size, data_placeholder, labels_placeholder)

      for r in range(COMM_ROUND):
        print_new_comm_round(r)
        comm_start_time = time.time()
        # select the participating clients
        if FLAGS.dpsgd:
          participating_clients = sampling(FLAGS.N, m, client_set, FLAGS.client_batch_size, \
                                           FLAGS.sample_mode, budgets_accountant)
        else:
          participating_clients = range(FLAGS.N) # temporary

        # if the condition of training cannot be satisfied. (no public clients or no sufficient candidates.
        if not len(participating_clients):
          print("the condition of training cannot be satisfied. (no public clients or no sufficient candidates.")
          print('Done! The procedure time:', time.time() - start_time)
          break
        print(participating_clients)

        ############################################################################################################
        # For each client c (out of the m chosen ones):
        for c in participating_clients:
        
          #########################################################################################################
          # Start local update
          # Setting the trainable Variables in the graph to the values stored in feed_dict 'model'
          #sess.run(assignments, feed_dict=model)
          update = local.update(sess, assignments, c, model, FLAGS.local_steps, train_op_list[c])
          server.aggregate(c, update, is_public = (c in budgets_accountant._public if FLAGS.dpsgd else True))

          if FLAGS.dpsgd:
            print('For client %d and delta=%f, the budget is %f and the used budget is: %f' %
               (c, float(FLAGS.delta), epsilons[c], budgets_accountant.get_accumulation(c)))
          #print('local update procedure time:', time.time() - start_time)
          # End of the local update
          ############################################################################################################

        # average and update the global model, apply_gradients(grads_and_vars, global_step)
        if FLAGS.fedavg:
          n_clients = len(participating_clients)
          w = np.array([1/n_clients] * n_clients)
          print(w)
        elif FLAGS.wei_avg:
          epsSubset = np.array(epsilons)[participating_clients]
          eps_sum = sum(epsSubset)
          w = np.array([eps/eps_sum for eps in epsSubset])
          print(epsSubset, w)
        else:
          w = None

        if FLAGS.error_feedback:
          model, errors = server.fedavg(model, errors, w)
        else:
          model = server.fedavg(model, None, w)

        # Setting the trainable Variables in the graph to the values stored in feed_dict 'model'
        sess.run(assignments + [increase_global_step], feed_dict=model)

        # validate the (current) global model using validation set.
        # create a feed-dict holding the validation set.
        feed_dict = {str(data_placeholder.name): x_test,
                     str(labels_placeholder.name): y_test}

        # compute the loss on the validation set.
        global_loss = sess.run(loss, feed_dict=feed_dict)
        count = sess.run(eval_op, feed_dict=feed_dict)
        accuracy = float(count) / float(len(y_test))
        accuracy_accountant.append(accuracy)
        print_loss_and_accuracy(global_loss, accuracy, stage='test')
        print('time of one communication:', time.time() - comm_start_time)        
        if FLAGS.dpsgd:
          save_progress(FLAGS, model, accuracy_accountant, budgets_accountant.get_global_budget())
        else:
          save_progress(FLAGS, model, accuracy_accountant)
        

    print('Done! The procedure time:', time.time() - start_time)
示例#11
0
文件: SVD.py 项目: Daftstone/APT
    def train(self, dataset, is_train, nb_epochs, weight1, use_weight=True):
        ckpt_save_path = "pretrain/%s/%s/embed_%d/model_%s_%d" % (
            FLAGS.dataset, FLAGS.rs, FLAGS.embed_size, FLAGS.gpu,
            FLAGS.target_item[0])
        if (not os.path.exists(ckpt_save_path)):
            os.makedirs(ckpt_save_path)

        saver_ckpt = tf.train.Saver()

        # initialize test data for Evaluate
        samples = utils.sampling(dataset, 0)

        all_users = self.dataset.trainMatrix.toarray()
        ref_users_idx = utils.cal_neighbor(all_users, all_users, 1)[0]
        if (FLAGS.dataset == 'ml-100k'):
            ts = 20.
            per_epochs = 2
            pre_training = 15
            select_num = 400
            up = 0.2
        elif (FLAGS.dataset == 'filmtrust'):
            ts = 40.
            per_epochs = 3
            pre_training = 4
            select_num = 100
            up = 0.25
        elif (FLAGS.dataset == 'ml-1m'):
            ts = 20.
            per_epochs = 2
            pre_training = 15
            select_num = 700
            up = 0.2
        elif (FLAGS.dataset == 'yelp'):
            ts = 10.
            per_epochs = 2
            pre_training = 10
            select_num = 2500
            up = 0.2

        pre_influence = 0.
        for cur_epochs in range(nb_epochs):
            if (cur_epochs % per_epochs == pre_training % per_epochs
                    and cur_epochs >= pre_training):
                if (self.type == 'adv'):
                    influence = self.influence_user(
                        self.dataset, self.dataset.trainMatrix.toarray())
                    inf_copy = influence.copy()
                    pos_idx = np.where(influence < 0)[0]
                    influence = influence[pos_idx]
                    # normalization
                    influence = ((influence - np.min(influence)) /
                                 (np.max(influence) - np.min(influence)) - 1)
                    fake_users = utils.generate_fake(self.extend, self.dataset)
                    # fake_users += np.ones_like(fake_users) * up * inf_sign
                    fake_users += up
                    mask = np.zeros((self.extend, self.num_items))
                    inf_idx = np.argsort(inf_copy)
                    p = np.exp(-influence * ts) / np.sum(
                        np.exp(-influence * ts))
                    print(p[0], np.max(p), np.min(p))
                    print(
                        np.where(inf_idx == FLAGS.target_item[0])[0],
                        np.where(inf_idx == FLAGS.target_item[1])[0],
                        np.where(inf_idx == FLAGS.target_item[2])[0],
                        np.where(inf_idx == FLAGS.target_item[3])[0])
                    for kk in range(len(mask)):
                        iiidx = np.random.choice(pos_idx,
                                                 select_num,
                                                 False,
                                                 p=p)
                        mask[kk, iiidx] = 1.
                else:
                    fake_users = utils.generate_fake(self.extend, self.dataset)
                    mask = np.zeros((self.extend, self.num_items))
                    for kk in range(len(mask)):
                        iiidx = np.random.choice(np.arange(self.num_items),
                                                 select_num, False)
                        mask[kk, iiidx] = 1.
                fake_users *= mask
                fake_users = np.clip(
                    np.round(fake_users * self.dataset.max_rate) /
                    self.dataset.max_rate, 0, 1)
                dataset = utils.estimate_dataset(self.dataset, fake_users)
                print(np.sum(fake_users) / np.sum(fake_users != 0))
                print(np.sum(fake_users != 0) / len(fake_users))
                samples = utils.sampling(dataset, 0)
            batchs = utils.get_batchs(samples, FLAGS.batch_size)
            for i in range(len(batchs)):
                users, items, rates = batchs[i]
                feed_dict = {
                    self.users_holder: users,
                    self.items_holder: items,
                    self.ratings_holder: rates
                }
                self.sess.run([self.train_op], feed_dict)
            if (cur_epochs % FLAGS.per_epochs == 0
                    or cur_epochs == nb_epochs - 1):
                if (FLAGS.dataset == 'yelp' and cur_epochs != nb_epochs - 1):
                    rate = self.sess.run(self.rate_partial)
                else:
                    rate = self.sess.run(self.rate)
                hr, ndcg = utils.train_evalute(rate, dataset, cur_epochs)
                # utils.save_model(ckpt_save_path, saver_ckpt, self.sess)
            else:
                print("cur epochs", cur_epochs)
        return hr, ndcg
示例#12
0
    def fit(self, *args, **kwargs):
        return self.model.fit(*args, **kwargs)


if __name__ == "__main__":
    ydata = np.load("appliances.npy")
    print(np.max(ydata))
    print(np.min(ydata))
    xdata2 = np.load("lights.npy")
    xdata = np.load("X_data.npy")

    xdata = np.hstack((xdata, xdata2))

    for _ in range(1):
        validation, training = sampling(size=len(xdata), validation_size=1000)
        test_y = ydata[validation, :]
        test_x = xdata[validation, :]

        train_x = xdata[training, :]
        train_y = ydata[training, :]

        model = SupervisedLearningModel(xdata.shape[1])

        diff = []
        model.model.fit(train_x, train_y, epochs=50, batch_size=16)
        record = np.zeros((1000, 3))
        for i in range(1000):
            original_one = test_y[i][0]
            inferred = model(np.expand_dims(test_x[i], 1).T).flatten()[0]
            original_one = to_appliance(original_one)
示例#13
0
        return output_log, state_log


if __name__ == "__main__":

    import numpy as np
    import torch.optim as optim
    from torch.utils.data import Dataset, DataLoader
    from utils import sampling, DataGenerator

    args = parameters.get_config()
    device = torch.device("cuda:0" if args.cuda else "cpu")
    #data = sampling(50, 100, "sin")
    #data2 = sampling(50, 100, "cos", phase=0.5*np.pi)
    #data = np.concatenate([data, data2], 0)
    data, noised_data = sampling(100, 100, "sin", noise=True)
    generator = DataGenerator(noised_data, data)
    inputs = DataLoader(generator, batch_size=args.batch_size)
    trainer = Trainer(args)
    #trainer.build_model(RNN)
    trainer.build_model(LSTM)
    #trainer.train(inputs)

    #rnn = trainer.model
    #rnn.eval()
    #output_log = []
    #for i in range(99):
    #    if i == 0:
    #        #cur_input = torch.zeros([1,2])
    #        cur_input = torch.zeros([1, 2])
    #        cur_input.data.numpy()[0, 0] = -0.75
示例#14
0
df = pd.read_csv("dataset/Telegram_1hour_7.csv")
df.insert(2, "label", int(0))
df_0 = df[["Time", "Length", "label"]].to_numpy()

df = pd.read_csv("dataset/Zoom_1hour_5.csv")
df.insert(2, "label", int(1))
df_1 = df[["Time", "Length", "label"]].to_numpy()

df = pd.read_csv("dataset/YouTube_1hour_2.csv")
df.insert(2, "label", int(2))
df_2 = df[["Time", "Length", "label"]].to_numpy()

df_set = sampling(w=args.window_size,
                  fix_num=args.fix_num,
                  df_0=df_0,
                  df_1=df_1,
                  df_2=df_2)

if args.MERGE == 0 or args.MERGE == 7 or args.MERGE == 9:
    df_set = Dataset_raw(df_set, MERGE=args.MERGE)

else:
    df_set = Dataset(df_set,
                     window_size=args.window_size,
                     fft_num=args.fft,
                     stat=args.stat,
                     MERGE=args.MERGE)

kfold = KFold(n_splits=args.k_folds, shuffle=True)
result_eval_dict = {"hyper_params": hyper_params}