def main(_):
    set_seed(FLAGS.seed, set_tf_seed=FLAGS.debug)
    logs_dir = Path(FLAGS.logs_dir)
    setup_logger(FLAGS.print_logs, FLAGS.save_logs, logs_dir, FLAGS.run_id)
    tf.config.experimental_run_functions_eagerly(FLAGS.debug)

    logging.info(f"Flags/config of this run:\n{get_flags_dict(FLAGS)}")
    gpus = tf.config.experimental.list_physical_devices('GPU')
    logging.info(f"Num GPUs Available: {len(gpus)}")
    if len(gpus) > 1:
        try:    # Restrict TensorFlow to only use the first GPU
            logging.info(f"Setting GPU Index {FLAGS.gpu_index} only")
            tf.config.experimental.set_visible_devices(gpus[FLAGS.gpu_index], 'GPU')
        except RuntimeError as e:
            logging.info(e)     # Visible devices must be set before GPUs have been initialized

    # load data
    train, dev, test, samples, n_relations, train_len, data = load_data(FLAGS)
    n_users, n_items, n_entities = get_quantities(data)

    model = get_model(n_entities, n_relations, data["id2iid"])
    optimizer = get_optimizer(FLAGS)
    loss_fn = getattr(losses, FLAGS.loss_fn)(ini_neg_index=0, end_neg_index=n_entities - 1, args=FLAGS)
    logging.info(f"Train split size: {train_len}, relations: {n_relations}")

    runner = Runner(FLAGS, model, optimizer, loss=loss_fn, train=train, dev=dev, test=test, samples=samples,
                    id2uid=data["id2uid"], id2iid=data["id2iid"], iid2name=data["iid2name"])
    runner.run()
    logging.info("Done!")
 def setUp(self):
     super().setUp()
     set_seed(42, set_tf_seed=True)
     self.dtype = tf.float64
     tf.keras.backend.set_floatx("float64")
     self.flags = get_flags()
     self.n_users = 2
     self.n_items = 2
     self.n_relations = 1
     self.item_ids = [0, 1]
示例#3
0
def main(_):
    setup_logger(print_logs=True, save_logs=False, save_path="", run_id="")
    set_seed(FLAGS.seed, set_tf_seed=False)
    triplets, all_rels = load_data(FLAGS)
    graph = build_graph(triplets)
    logging.info(nx.info(graph))

    curvatures = seccurv(graph,
                         sample_ratio=FLAGS.sample_ratio,
                         max_neigh_pairs=FLAGS.max_neigh_pairs)
    out_file = f"outseccurv-{FLAGS.prep_name.split('-')[0]}-{'all' if all_rels else 'no'}rel"
    np.save(out_file, curvatures)
def main(_):
    set_seed(FLAGS.seed, set_tf_seed=True)
    dataset_path = Path(FLAGS.dataset_path)
    if FLAGS.item == "keen":
        samples = keen.load_user_keen_interactions(
            dataset_path,
            min_user_ints=FLAGS.min_user_interactions,
            min_item_ints=FLAGS.min_item_interactions,
            max_item_ints=FLAGS.max_item_interactions)
        iid2name = keen.build_iid2title(item_id_key="keen_id",
                                        item_title_key="keen_title")
    elif FLAGS.item == "gem":
        samples = keen.load_keen_gems_interactions(
            dataset_path,
            min_keen_keen_edges=2,
            max_keen_keen_edges=1000,
            min_overlapping_users=2,
            min_keen_ints=FLAGS.min_user_interactions,
            min_item_ints=FLAGS.min_item_interactions,
            max_item_ints=FLAGS.max_item_interactions)
        iid2name = keen.build_iid2title(item_id_key="gem_id",
                                        item_title_key="gem_link_title")
    elif FLAGS.item == "ml-1m":
        samples = movielens.movielens_to_dict(dataset_path)
        iid2name = movielens.build_movieid2title(dataset_path)
    elif "amazon" in FLAGS.item:
        samples = amazon.load_interactions(dataset_path / FLAGS.amazon_reviews)
        iid2name = amazon.build_itemid2name(dataset_path / FLAGS.amazon_meta)
    else:
        raise ValueError(f"Unknown item: {FLAGS.item}")

    if FLAGS.filter_most_popular > 0:
        print(f"Filtering {FLAGS.filter_most_popular} most popular items")
        sorted_items = sort_items_by_popularity(samples)
        iid_to_filter = set(
            [iid for iid, _ in sorted_items[:FLAGS.filter_most_popular]])
        samples = {
            uid: list(set(ints) - iid_to_filter)
            for uid, ints in samples.items()
        }
        samples = {uid: ints for uid, ints in samples.items() if ints}

    if FLAGS.plot_graph:
        plot_graph(samples)
        return

    uid2id, iid2id = map_raw_ids_to_sequential_ids(samples)

    id_samples = {}
    for uid, ints in samples.items():
        if FLAGS.item == "keen" or FLAGS.item == "gem":
            ints = sorted(ints)
        id_samples[uid2id[uid]] = [iid2id[iid] for iid in ints]

    data = create_splits(id_samples,
                         Relations.USER_ITEM.value,
                         do_random=FLAGS.shuffle,
                         seed=FLAGS.seed)
    data["iid2name"] = {iid: iid2name.get(iid, "None") for iid in iid2id}
    data["id2uid"] = {v: k for k, v in uid2id.items()}
    data["id2iid"] = {v: k for k, v in iid2id.items()}
    print(f"User item interaction triplets: {len(data['train'])}")
    n_entities = len(uid2id) + len(iid2id)

    # if there is an item-item graph, we preprocess it
    if FLAGS.item_item_file:
        item_item_distances_dict = load_item_item_distances(
            dataset_path / FLAGS.item_item_file)
        item_item_triplets = build_item_item_triplets(
            item_item_distances_dict, iid2id, FLAGS.similarity_items_per_item)
        add_to_train_split(data, item_item_triplets)
        print(
            f"Added item-item similarity triplets: {len(item_item_triplets)}")

    if "amazon" in FLAGS.item and FLAGS.add_extra_relations:
        print("Adding extra relations")
        n_entities = amazon_relations.load_relations(
            dataset_path / FLAGS.amazon_meta, data, iid2id, n_entities)

    data["n_entities"] = n_entities
    # creates directories to save preprocessed data
    print(f"Final training split: {len(data['train'])} triplets")
    prep_path = Path(CONFIG["string"]["prep_dir"][1])
    prep_path.mkdir(parents=True, exist_ok=True)
    to_save_dir = prep_path / FLAGS.item
    to_save_dir.mkdir(parents=True, exist_ok=True)
    save_as_pickle(to_save_dir / f'{FLAGS.prep_id}.pickle', data)

    if FLAGS.export_splits:
        export_splits(data, to_save_dir, FLAGS.prep_id)

    print("Done!")
示例#5
0
 def setUp(self):
     super().setUp()
     set_seed(42, set_tf_seed=True)
     self.dtype = tf.float64
     self.c = tf.convert_to_tensor([1.0], dtype=self.dtype)