示例#1
0
METRICS = ['mrr']

MODELS = [
    # Only item sets
    #Countbased(),
    #SVDRecommender(1000, use_title=False),
    #AAERecommender(adversarial=True, use_title=False, n_epochs=55, embedding=VECTORS),
    #AAERecommender(adversarial=False, n_epochs=1),
    #VAERecommender(conditions=None, n_epochs=55, batch_size=1000),
    #DAERecommender(conditions=None, n_epochs=55, batch_size=1000),
    # Title-enhanced
    #SVDRecommender(1000, use_title=True),
    #AAERecommender(adversarial=True, use_side_info=True, n_epochs=55, embedding=VECTORS),
    #AAERecommender(adversarial=False, use_side_info=["name"], n_epochs=5, embedding=VECTORS),
    #DecodingRecommender(n_epochs=55, embedding=VECTORS)
    VAERecommender(conditions=CONDITIONS, n_epochs=55, batch_size=1000),
    DAERecommender(conditions=CONDITIONS, n_epochs=55, batch_size=1000),
    # Generic condition all
    #AAERecommender(adversarial=False, conditions=CONDITIONS, n_epochs=55),
    #AAERecommender(adversarial=True, conditions=CONDITIONS, n_epochs=55),
    #DecodingRecommender(conditions=CONDITIONS, n_epochs=55)
    # Put more here...
]


def load(path):
    """ Loads a single slice """
    with open(path, 'r') as fhandle:
        obj = json.load(fhandle)
    return obj["playlists"]
示例#2
0
    'n_hidden': 100,
    'normalize_inputs': True,
}

# Models without metadata
BASELINES = [
    # RandomBaseline(),
    # MostPopular(),
    Countbased(),
    SVDRecommender(1000, use_title=False),
]

RECOMMENDERS = [
    AAERecommender(adversarial=False, lr=0.001, **ae_params),
    AAERecommender(prior='gauss', gen_lr=0.001, reg_lr=0.001, **ae_params),
    VAERecommender(conditions=None, **vae_params),
    DAERecommender(conditions=None, **ae_params)
]

# Metadata to use
CONDITIONS = ConditionList([
    ('title', PretrainedWordEmbeddingCondition(VECTORS)),
    #    ('author', CategoricalCondition(embedding_dim=32, reduce="sum",
    #                                    sparse=True, embedding_on_gpu=True))
])

# Model with metadata (metadata used as set in CONDITIONS above)
CONDITIONED_MODELS = [
    # TODO SVD can use only titles not generic conditions
    SVDRecommender(1000, use_title=True),
    AAERecommender(adversarial=False,
示例#3
0
def main(year,
         dataset,
         min_count=None,
         outfile=None,
         drop=1,
         baselines=False,
         autoencoders=False,
         conditioned_autoencoders=False,
         all_metadata=True):
    """ Main function for training and evaluating AAE methods on DBLP data """

    assert baselines or autoencoders or conditioned_autoencoders, "Please specify what to run"

    if all_metadata:
        # V2 - all metadata
        CONDITIONS = ConditionList([
            ('title', PretrainedWordEmbeddingCondition(VECTORS)),
            ('venue', PretrainedWordEmbeddingCondition(VECTORS)),
            (
                'author',
                CategoricalCondition(
                    embedding_dim=32,
                    reduce="sum",  # vocab_size=0.01,
                    sparse=False,
                    embedding_on_gpu=True))
        ])
    else:
        # V1 - only title metadata
        CONDITIONS = ConditionList([
            ('title', PretrainedWordEmbeddingCondition(VECTORS))
        ])
    #### CONDITOINS defined

    ALL_MODELS = []

    if baselines:
        # Models without metadata
        BASELINES = [
            # RandomBaseline(),
            # MostPopular(),
            Countbased(),
            SVDRecommender(1000, use_title=False)
        ]

        ALL_MODELS += BASELINES

        if not all_metadata:
            # SVD can use only titles not generic conditions
            ALL_MODELS += [SVDRecommender(1000, use_title=True)]

    if autoencoders:
        AUTOENCODERS = [
            AAERecommender(adversarial=False,
                           conditions=None,
                           lr=0.001,
                           **AE_PARAMS),
            AAERecommender(adversarial=True,
                           conditions=None,
                           gen_lr=0.001,
                           reg_lr=0.001,
                           **AE_PARAMS),
            VAERecommender(conditions=None, **AE_PARAMS),
            DAERecommender(conditions=None, **AE_PARAMS)
        ]
        ALL_MODELS += AUTOENCODERS

    if conditioned_autoencoders:
        # Model with metadata (metadata used as set in CONDITIONS above)
        CONDITIONED_AUTOENCODERS = [
            AAERecommender(adversarial=False,
                           conditions=CONDITIONS,
                           lr=0.001,
                           **AE_PARAMS),
            AAERecommender(adversarial=True,
                           conditions=CONDITIONS,
                           gen_lr=0.001,
                           reg_lr=0.001,
                           **AE_PARAMS),
            DecodingRecommender(CONDITIONS,
                                n_epochs=100,
                                batch_size=1000,
                                optimizer='adam',
                                n_hidden=100,
                                lr=0.001,
                                verbose=True),
            VAERecommender(conditions=CONDITIONS, **AE_PARAMS),
            DAERecommender(conditions=CONDITIONS, **AE_PARAMS)
        ]
        ALL_MODELS += CONDITIONED_AUTOENCODERS

    print("Finished preparing models:", *ALL_MODELS, sep='\n\t')

    path = DATA_PATH + ("dblp-ref/" if dataset == "dblp" else "acm.txt")
    print("Loading data from", path)
    papers = papers_from_files(path, dataset, n_jobs=4)
    print("Unpacking {} data...".format(dataset))
    bags_of_papers, ids, side_info = unpack_papers(papers)
    del papers
    bags = Bags(bags_of_papers, ids, side_info)
    if args.compute_mi:
        from aaerec.utils import compute_mutual_info
        print("[MI] Dataset:", dataset)
        print("[MI] min Count:", min_count)
        tmp = bags.build_vocab(min_count=min_count, max_features=None)
        mi = compute_mutual_info(tmp,
                                 conditions=None,
                                 include_labels=True,
                                 normalize=True)
        with open('mi.csv', 'a') as mifile:
            print(dataset, min_count, mi, sep=',', file=mifile)

        print("=" * 78)
        exit(0)

    log("Whole dataset:", logfile=outfile)
    log(bags, logfile=outfile)

    evaluation = Evaluation(bags, year, logfile=outfile)
    evaluation.setup(min_count=min_count, min_elements=2, drop=drop)
    with open(outfile, 'a') as fh:
        print("~ Partial List + Titles + Author + Venue", "~" * 42, file=fh)
    evaluation(ALL_MODELS, batch_size=1000)