Python Constants示例，utils.constants.Constants Python示例

示例#1

0

显示文件

文件： topic_model_analyzer.py 项目： melqkiades/yelp

def manual_main():

    csv_file_name = Constants.generate_file_name(
        'topic_model_analysis', 'csv', Constants.RESULTS_FOLDER, None, None,
        False)
    json_file_name = Constants.generate_file_name(
        'topic_model_analysis', 'json', Constants.RESULTS_FOLDER, None, None,
        False)
    print(json_file_name)
    print(csv_file_name)

    num_topics_list = [Constants.TOPIC_MODEL_NUM_TOPICS]
    num_cycles = len(num_topics_list)
    cycle_index = 1
    for num_topics in num_topics_list:
        print('\ncycle_index: %d/%d' % (cycle_index, num_cycles))
        new_dict = {
            Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: num_topics,
        }

        print(new_dict)

        Constants.update_properties(new_dict)
        results = Constants.get_properties_copy()
        results.update(analyze_topics(include_stability=False))

        write_results_to_csv(csv_file_name, results)
        write_results_to_json(json_file_name, results)

        cycle_index += 1

示例#2

0

显示文件

文件： topic_model_analyzer.py 项目： melqkiades/yelp

def cli_main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-t', '--numtopics', metavar='int', type=int,
        nargs=1, help='The number of topics of the topic model')

    args = parser.parse_args()
    num_topics = args.numtopics[0] if args.numtopics is not None else None

    if num_topics is not None:
        Constants.update_properties(
            {Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: num_topics})

    results = Constants.get_properties_copy()
    results.update(analyze_topics(include_stability=True))

    csv_file_name = Constants.generate_file_name(
        'topic_model_analysis', 'csv', Constants.RESULTS_FOLDER, None, None,
        False)
    json_file_name = Constants.generate_file_name(
        'topic_model_analysis', 'json', Constants.RESULTS_FOLDER, None, None,
        False)

    write_results_to_csv(csv_file_name, results)
    write_results_to_json(json_file_name, results)

示例#3

0

显示文件

def exp_ae_visual_features():
    exp_name = 'ae_visual_features'
    out_base_dir = os.path.join(
        os.getcwd(),
        'symlinks/exp/google_images/' + \
        'normalized_resnet_features_recon_loss_trained_on_google')
    exp_const = ExpConstants(exp_name, out_base_dir)
    exp_const.log_dir = os.path.join(exp_const.exp_dir, 'log')
    exp_const.model_dir = os.path.join(exp_const.exp_dir, 'models')
    exp_const.batch_size = 10000
    exp_const.lr = 1e-2
    exp_const.num_epochs = 1000

    feature_dir = os.path.join(
        os.getcwd(),
        'symlinks/exp/google_images/' + \
        'normalized_resnet_features_recon_loss_trained_on_google')
    data_const = VisualFeaturesDatasetConstants(feature_dir)

    model_const = Constants()
    model_const.encoder = EncoderConstants()
    model_const.encoder.output_dims = 300
    model_const.decoder = DecoderConstants()
    model_const.decoder.input_dims = 300

    train_ae_visual.main(exp_const, data_const, model_const)

示例#4

0

显示文件

文件： main.py 项目： swarnamd/yelp

def calculate_topic_stability(records):

    Constants.update_properties({
        Constants.NUMPY_RANDOM_SEED_FIELD: Constants.NUMPY_RANDOM_SEED + 10,
        Constants.RANDOM_SEED_FIELD: Constants.RANDOM_SEED + 10
    })
    utilities.plant_seeds()
    Constants.print_properties()

    if Constants.SEPARATE_TOPIC_MODEL_RECSYS_REVIEWS:
        num_records = len(records)
        records = records[:num_records / 2]
    print('num_reviews', len(records))

    all_term_rankings = []

    context_extractor =\
        topic_model_creator.create_topic_model(records, None, None)
    terms_matrix = get_topic_model_terms(
        context_extractor, Constants.TOPIC_MODEL_STABILITY_NUM_TERMS)
    all_term_rankings.append(terms_matrix)

    sample_ratio = 0.8

    print('Total iterations: %d' % Constants.TOPIC_MODEL_STABILITY_ITERATIONS)
    for _ in range(Constants.TOPIC_MODEL_STABILITY_ITERATIONS - 1):
        sampled_records = sample_list(records, sample_ratio)
        context_extractor = \
            topic_model_creator.train_context_extractor(sampled_records)
        terms_matrix = get_topic_model_terms(
            context_extractor, Constants.TOPIC_MODEL_STABILITY_NUM_TERMS)
        all_term_rankings.append(terms_matrix)

    return calculate_stability(all_term_rankings)

示例#5

0

显示文件

文件： run_template.py 项目： gqh1995/vico

def exp_train():
    exp_name = 'EXP_NAME'
    out_base_dir = os.path.join(os.getcwd(), 'symlinks/exp/EXP_GROUP')
    exp_const = ExpConstants(exp_name, out_base_dir)
    exp_const.model_dir = os.path.join(exp_const.exp_dir, 'models')
    exp_const.log_dir = os.path.join(exp_const.exp_dir, 'log')
    exp_const.vis_dir = os.path.join(exp_const.exp_dir, 'vis')
    exp_const.log_step = 10
    exp_const.model_save_step = 1000
    exp_const.val_step = 1000
    exp_const.num_val_samples = 1000
    exp_const.batch_size = 32
    exp_const.num_epochs = 1000
    exp_const.lr = 0.01
    exp_const.momentum = 0.9
    exp_const.num_workers = 5
    exp_const.optimizer = 'SGD'
    exp_const.subset = {'training': 'train', 'validation': 'val'}

    data_const = DATASET_CONSTANTS()

    model_const = Constants()
    model_const.model_num = None
    model_const.net = NET_CONSTANTS()
    model_const.net_path = os.path.join(exp_const.model_dir,
                                        f'net_{model_const.model_num}')

    train.main(exp_const, data_const, model_const)

示例#6

0

显示文件

    def count_frequencies(self):
        """
        Counts the number of reviews each user and item have and stores the
        results in two separate files, one for the users and another one for the
        items. Note that the integer IDs are used and not the original user and
        item IDs
        """
        print('%s: count frequencies' % time.strftime("%Y/%m/%d-%H:%M:%S"))

        user_frequency_map = ETLUtils.count_frequency(
            self.records, Constants.USER_INTEGER_ID_FIELD)
        item_frequency_map = ETLUtils.count_frequency(
            self.records, Constants.ITEM_INTEGER_ID_FIELD)

        user_frequency_file = Constants.generate_file_name(
            'user_frequency_map', 'json', Constants.CACHE_FOLDER, None, None,
            False
        )
        item_frequency_file = Constants.generate_file_name(
            'item_frequency_map', 'json', Constants.CACHE_FOLDER, None, None,
            False
        )

        ETLUtils.save_json_file(user_frequency_file, [user_frequency_map])
        ETLUtils.save_json_file(item_frequency_file, [item_frequency_map])

示例#7

0

显示文件

def exp_combine_glove_and_visual_features_with_ae():
    exp_name = 'ae_glove_and_visual'
    out_base_dir = os.path.join(
        os.getcwd(),
        'symlinks/exp/google_images/' + \
        'normalized_resnet_embeddings_recon_loss_trained_on_google')
    exp_const = ExpConstants(exp_name, out_base_dir)
    exp_const.log_dir = os.path.join(exp_const.exp_dir, 'log')
    exp_const.model_dir = os.path.join(exp_const.exp_dir, 'models')
    exp_const.batch_size = 10000
    exp_const.lr = 1e-2
    exp_const.num_epochs = 1000

    concat_embeddings_dir = os.path.join(
        os.getcwd(),
        'symlinks/exp/google_images/' + \
        'normalized_resnet_embeddings_recon_loss_trained_on_google/' + \
        'concat_glove_and_visual')
    data_const = ConcatEmbedDatasetConstants(concat_embeddings_dir)
    data_const.embeddings_h5py = os.path.join(data_const.concat_dir,
                                              'subset_visual_word_vecs.h5py')
    data_const.word_to_idx_json = os.path.join(
        data_const.concat_dir, 'subset_visual_word_vecs_idx.json')

    model_const = Constants()
    model_const.encoder = EncoderConstants()
    model_const.decoder = DecoderConstants()

    train_ae.main(exp_const, data_const, model_const)

示例#8

0

显示文件

def create_topic_models():
    my_list = range(2, 61)

    for i in my_list:
        Constants.update_properties({Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: i})
        reviews_preprocessor = ReviewsPreprocessor(use_cache=True)
        reviews_preprocessor.full_cycle()

示例#9

0

显示文件

文件： topic_model_context_richness.py 项目： melqkiades/yelp

def full_cycle():

    num_topics_list = [5, 10, 20, 40]
    # bow_type_list = [None, 'NN', 'JJ', 'VB']
    review_type_list = ['specific', 'generic']
    # num_topics_list = [10]
    bow_type_list = ['NN']
    results = []

    for num_topics, bow_type, review_type in itertools.product(
            num_topics_list, bow_type_list, review_type_list):

        Constants.update_properties({
            Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: num_topics,
            Constants.BOW_TYPE_FIELD: bow_type,
            Constants.TOPIC_MODEL_TARGET_REVIEWS_FIELD: review_type
        })

        result = analyze_topics()
        result.update({
            Constants.BOW_TYPE_FIELD: bow_type,
            Constants.TOPIC_MODEL_TARGET_REVIEWS_FIELD: review_type
        })
        results.append(result)

    for result in results:
        print(result)

    prefix = Constants.RESULTS_FOLDER + Constants.ITEM_TYPE + \
        '_topic_model_context_richness'
    csv_file_path = prefix + '.csv'
    json_file_path = prefix + '.json'
    headers = sorted(results[0].keys())

示例#10

0

显示文件

文件： top_n_parameter_tuner.py 项目： melqkiades/yelp

def run_recommender(args):
    import sys
    # sys.path.append('/Users/fpena/UCC/Thesis/projects/yelp/source/python')
    sys.path.append('/home/fpena/yelp/source/python')
    from utils.constants import Constants
    from evaluation.context_top_n_runner import ContextTopNRunner

    print('\n\n************************\n************************\n')
    print('args', args)

    # Cast integer values
    args[Constants.FM_ITERATIONS_FIELD] = \
        int(args[Constants.FM_ITERATIONS_FIELD])
    args[Constants.FM_NUM_FACTORS_FIELD] = \
        int(args[Constants.FM_NUM_FACTORS_FIELD])
    if args[Constants.USE_CONTEXT_FIELD]:
        args[Constants.TOPIC_MODEL_ITERATIONS_FIELD] = \
            int(args[Constants.TOPIC_MODEL_ITERATIONS_FIELD])
        args[Constants.TOPIC_MODEL_PASSES_FIELD] = \
            int(args[Constants.TOPIC_MODEL_PASSES_FIELD])
        args[Constants.TOPIC_MODEL_NUM_TOPICS_FIELD] = \
            int(args[Constants.TOPIC_MODEL_NUM_TOPICS_FIELD])

    Constants.update_properties(args)

    # Finish updating parameters

    my_context_top_n_runner = ContextTopNRunner()
    results = my_context_top_n_runner.run()
    results['loss'] = -results[Constants.EVALUATION_METRIC]
    results['status'] = 'ok'

    print('loss', results['loss'])

    return results

示例#11

0

显示文件

def main(**kwargs):
    exp_base_dir = coco_paths['exp_dir']
    if kwargs['dataset'] == 'flickr':
        exp_base_dir = flickr_paths['exp_dir']
    exp_const = ExpConstants(kwargs['exp_name'], exp_base_dir)
    exp_const.model_dir = os.path.join(exp_const.exp_dir, 'models')
    exp_const.seed = 0
    exp_const.contextualize = not kwargs['no_context']
    exp_const.random_lang = kwargs['random_lang']

    data_const = FlickrDatasetConstants(kwargs['subset'])

    model_const = Constants()
    model_const.model_num = kwargs['model_num']
    model_const.object_encoder = ObjectEncoderConstants()
    model_const.object_encoder.context_layer.output_attentions = True
    model_const.object_encoder.object_feature_dim = 2048
    model_const.cap_encoder = CapEncoderConstants()
    model_const.cap_encoder.output_attentions = True
    model_const.cap_info_nce_layers = kwargs['cap_info_nce_layers']
    if model_const.model_num == -100:
        filename = os.path.join(exp_const.exp_dir, f'results_val_best.json')
        results = io.load_json_object(filename)
        model_const.model_num = results['model_num']
        print('Selected model num:', model_const.model_num)

    model_const.object_encoder_path = os.path.join(
        exp_const.model_dir, f'object_encoder_{model_const.model_num}')
    model_const.lang_sup_criterion_path = os.path.join(
        exp_const.model_dir, f'lang_sup_criterion_{model_const.model_num}')
    if exp_const.random_lang is True:
        model_const.cap_encoder_path = os.path.join(
            exp_const.model_dir, f'cap_encoder_{model_const.model_num}')

    eval_flickr_phrase_loc.main(exp_const, data_const, model_const)

示例#12

0

显示文件

文件： topic_model_analyzer.py 项目： swarnamd/yelp

def cli_main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-t',
                        '--numtopics',
                        metavar='int',
                        type=int,
                        nargs=1,
                        help='The number of topics of the topic model')

    args = parser.parse_args()
    num_topics = args.numtopics[0] if args.numtopics is not None else None

    if num_topics is not None:
        Constants.update_properties(
            {Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: num_topics})

    results = Constants.get_properties_copy()
    results.update(analyze_topics(include_stability=True))

    csv_file_name = Constants.generate_file_name('topic_model_analysis', 'csv',
                                                 Constants.RESULTS_FOLDER,
                                                 None, None, False)
    json_file_name = Constants.generate_file_name('topic_model_analysis',
                                                  'json',
                                                  Constants.RESULTS_FOLDER,
                                                  None, None, False)

    write_results_to_csv(csv_file_name, results)
    write_results_to_json(json_file_name, results)

示例#13

0

显示文件

def run_recommender(args):
    import sys
    # sys.path.append('/Users/fpena/UCC/Thesis/projects/yelp/source/python')
    sys.path.append('/home/fpena/yelp/source/python')
    from utils.constants import Constants
    from evaluation.context_top_n_runner import ContextTopNRunner

    print('\n\n************************\n************************\n')
    print('args', args)

    # Cast integer values
    args[Constants.FM_ITERATIONS_FIELD] = \
        int(args[Constants.FM_ITERATIONS_FIELD])
    args[Constants.FM_NUM_FACTORS_FIELD] = \
        int(args[Constants.FM_NUM_FACTORS_FIELD])
    if args[Constants.USE_CONTEXT_FIELD]:
        args[Constants.TOPIC_MODEL_ITERATIONS_FIELD] = \
            int(args[Constants.TOPIC_MODEL_ITERATIONS_FIELD])
        args[Constants.TOPIC_MODEL_PASSES_FIELD] = \
            int(args[Constants.TOPIC_MODEL_PASSES_FIELD])
        args[Constants.TOPIC_MODEL_NUM_TOPICS_FIELD] = \
            int(args[Constants.TOPIC_MODEL_NUM_TOPICS_FIELD])

    Constants.update_properties(args)

    # Finish updating parameters

    my_context_top_n_runner = ContextTopNRunner()
    results = my_context_top_n_runner.run()
    results['loss'] = -results[Constants.EVALUATION_METRIC]
    results['status'] = 'ok'

    print('loss', results['loss'])

    return results

示例#14

0

显示文件

文件： topic_model_analyzer.py 项目： swarnamd/yelp

def manual_main():

    csv_file_name = Constants.generate_file_name('topic_model_analysis', 'csv',
                                                 Constants.RESULTS_FOLDER,
                                                 None, None, False)
    json_file_name = Constants.generate_file_name('topic_model_analysis',
                                                  'json',
                                                  Constants.RESULTS_FOLDER,
                                                  None, None, False)
    print(json_file_name)
    print(csv_file_name)

    num_topics_list = [Constants.TOPIC_MODEL_NUM_TOPICS]
    num_cycles = len(num_topics_list)
    cycle_index = 1
    for num_topics in num_topics_list:
        print('\ncycle_index: %d/%d' % (cycle_index, num_cycles))
        new_dict = {
            Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: num_topics,
        }

        print(new_dict)

        Constants.update_properties(new_dict)
        results = Constants.get_properties_copy()
        results.update(analyze_topics(include_stability=False))

        write_results_to_csv(csv_file_name, results)
        write_results_to_json(json_file_name, results)

        cycle_index += 1

示例#15

0

显示文件

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-c', '--cycle', metavar='int', type=int,
        nargs=1, help='The index of the running cycle')
    parser.add_argument(
        '-f', '--fold', metavar='int', type=int,
        nargs=1, help='The index of the cross validation fold')
    parser.add_argument(
        '-t', '--numtopics', metavar='int', type=int,
        nargs=1, help='The number of topics of the topic model')

    args = parser.parse_args()
    fold = args.fold[0] if args.fold is not None else None
    cycle = args.cycle[0] if args.cycle is not None else None
    num_topics = args.numtopics[0] if args.numtopics is not None else None

    if num_topics is not None:
        Constants.update_properties(
            {Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: num_topics})

    if fold is None and cycle is None:
        records = ETLUtils.load_json_file(Constants.PROCESSED_RECORDS_FILE)

        if Constants.SEPARATE_TOPIC_MODEL_RECSYS_REVIEWS:
            num_records = len(records)
            records = records[:num_records / 2]
        print('num_reviews', len(records))

        create_topic_model(records, None, None)
    else:
        create_single_topic_model(cycle, fold)

示例#16

0

显示文件

文件： topic_model_divergence.py 项目： melqkiades/yelp

def preprocess_data():
    my_list = range(2, 61)

    for i in my_list:
        Constants.update_properties({Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: i})
        reviews_preprocessor = ReviewsPreprocessor(use_cache=True)
        reviews_preprocessor.full_cycle()

示例#17

0

显示文件

文件： main.py 项目： swarnamd/yelp

def dataset_bucket_analysis_by_field(field):
    # Set the dataset
    hotel_dataset_properties = {Constants.BUSINESS_TYPE_FIELD: 'fourcity_hotel'}
    Constants.update_properties(hotel_dataset_properties)

    records = ETLUtils.load_json_file(Constants.PROCESSED_RECORDS_FILE)

    print('Loaded %d records' % len(records))

    user_frequency_map = {}

    for record in records:

        user_id = record[field]
        if user_id not in user_frequency_map:
            user_frequency_map[user_id] = 0
        user_frequency_map[user_id] += 1

    print('There is a total of %d %ss' % (len(user_frequency_map), field))
    sorted_x = sorted(user_frequency_map.items(), key=operator.itemgetter(1), reverse=True)
    print(sorted_x[0])
    print(sorted_x[1])
    print(sorted_x[2])
    # print(user_frequency_map)

    # Number of reviews per user
    rda = ReviewsDatasetAnalyzer(records)
    users_summary = rda.summarize_reviews_by_field(field)
    print('Average number of reviews per %s: %f' % (field,
          float(rda.num_reviews) / rda.num_users))
    users_summary.plot(kind='line', rot=0)

    pandas.set_option('display.max_rows', len(users_summary))
    print(users_summary)
    pandas.reset_option('display.max_rows')

示例#18

0

显示文件

文件： run.py 项目： IreneMahhy/no_frills_hoi_det-mod

def _exp_top_boxes_per_hoi(out_base_dir, data_const):
    args = parser.parse_args()
    not_specified_args = manage_required_args(
        args,
        parser,
        required_args=['model_num'],
        optional_args=[
            'verb_given_appearance',
            'verb_given_human_appearance',
            'verb_given_object_appearance',
            'verb_given_boxes_and_object_label',
            'verb_given_human_pose',
            'rcnn_det_prob'])

    exp_name = 'factors'
    if args.rcnn_det_prob:
        exp_name += '_rcnn_det_prob'
    if args.verb_given_appearance:
        exp_name += '_appearance'
    if args.verb_given_human_appearance:
        exp_name += '_human_appearance'
    if args.verb_given_object_appearance:
        exp_name += '_object_appearance'
    if args.verb_given_boxes_and_object_label:
        exp_name += '_boxes_and_object_label'
    if args.verb_given_human_pose:
        exp_name += '_human_pose'

    exp_const = ExpConstants(
        exp_name=exp_name,
        out_base_dir=out_base_dir)
    exp_const.model_dir = os.path.join(exp_const.exp_dir,'models')
    exp_const.num_to_vis = 10

    data_const.pred_hoi_dets_h5py = os.path.join(
        exp_const.exp_dir,
        f'pred_hoi_dets_test_{args.model_num}.hdf5')
    hoi_cand_dir = os.path.join(
        os.getcwd(),
        'data_symlinks/hico_exp/hoi_candidates')
    data_const.human_pose_feats_hdf5 = os.path.join(
        hoi_cand_dir,
        'human_pose_feats_test.hdf5')
    data_const.num_pose_keypoints = 18
    
    model_const = Constants()
    model_const.model_num = args.model_num
    model_const.hoi_classifier = HoiClassifierConstants()
    model_const.hoi_classifier.verb_given_appearance = args.verb_given_appearance
    model_const.hoi_classifier.verb_given_boxes_and_object_label = args.verb_given_boxes_and_object_label
    model_const.hoi_classifier.verb_given_human_pose = args.verb_given_human_pose
    model_const.hoi_classifier.rcnn_det_prob = args.rcnn_det_prob
    model_const.hoi_classifier.model_pth = os.path.join(
        exp_const.model_dir,
        f'hoi_classifier_{model_const.model_num}')

    vis_top_boxes_per_hoi.main(exp_const, data_const, model_const)

示例#19

0

显示文件

文件： reviews_preprocessor.py 项目： ptzagk/yelp

    def full_cycle(self):
        Constants.print_properties()
        print('%s: full cycle' % time.strftime("%Y/%m/%d-%H:%M:%S"))
        utilities.plant_seeds()

        if self.use_cache and \
                os.path.exists(Constants.PROCESSED_RECORDS_FILE):
            print('Records have already been processed')
            self.records = \
                ETLUtils.load_json_file(Constants.PROCESSED_RECORDS_FILE)
        else:
            self.load_records()

            if 'yelp' in Constants.ITEM_TYPE:
                self.transform_yelp_records()
            elif 'fourcity' in Constants.ITEM_TYPE:
                self.transform_fourcity_records()

            self.add_integer_ids()
            self.clean_reviews()
            self.remove_duplicate_reviews()
            self.tag_reviews_language()
            self.remove_foreign_reviews()
            self.lemmatize_records()
            self.remove_users_with_low_reviews()
            self.remove_items_with_low_reviews()
            self.count_frequencies()
            self.shuffle_records()
            print('total_records: %d' % len(self.records))
            self.classify_reviews()
            self.build_bag_of_words()
            self.tag_contextual_reviews()
            # self.load_full_records()
            self.build_dictionary()
            self.build_corpus()
            self.label_review_targets()
            self.export_records()

        self.count_specific_generic_ratio()
        # self.export_to_triplet()

        rda = ReviewsDatasetAnalyzer(self.records)
        print('density: %f' % rda.calculate_density_approx())
        print('sparsity: %f' % rda.calculate_sparsity_approx())
        print('total_records: %d' % len(self.records))
        user_ids = \
            extractor.get_groupby_list(self.records, Constants.USER_ID_FIELD)
        item_ids = \
            extractor.get_groupby_list(self.records, Constants.ITEM_ID_FIELD)
        print('total users', len(user_ids))
        print('total items', len(item_ids))

        if Constants.SEPARATE_TOPIC_MODEL_RECSYS_REVIEWS:
            self.separate_recsys_topic_model_records()

示例#20

0

显示文件

文件： topic_model_eval_caller.py 项目： melqkiades/yelp

def cycle_eval_topic_model(metric, num_topics_list):

    csv_file_name = Constants.generate_file_name(
        metric, 'csv', Constants.RESULTS_FOLDER, None, None,
        False)

    for topic in num_topics_list:
        Constants.update_properties(
            {Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: topic})
        results = run_eval_topic_model(metric)
        topic_model_analyzer.write_results_to_csv(csv_file_name, results)

示例#21

0

显示文件

def cycle_eval_topic_model(metric, num_topics_list):

    csv_file_name = Constants.generate_file_name(metric, 'csv',
                                                 Constants.RESULTS_FOLDER,
                                                 None, None, False)

    for topic in num_topics_list:
        Constants.update_properties(
            {Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: topic})
        results = run_eval_topic_model(metric)
        topic_model_analyzer.write_results_to_csv(csv_file_name, results)

示例#22

0

显示文件

def main(**kwargs):
    exp_base_dir = coco_paths['exp_dir']
    if kwargs['dataset'] == 'flickr':
        exp_base_dir = flickr_paths['exp_dir']
    exp_const = ExpConstants(kwargs['exp_name'], exp_base_dir)
    exp_const.log_dir = os.path.join(exp_const.exp_dir, 'logs')
    exp_const.model_dir = os.path.join(exp_const.exp_dir, 'models')
    exp_const.vis_dir = os.path.join(exp_const.exp_dir, 'vis')
    exp_const.dataset = kwargs['dataset']
    exp_const.optimizer = 'Adam'
    exp_const.lr = kwargs['lr']
    exp_const.momentum = None
    exp_const.num_epochs = 10
    exp_const.log_step = 20
    # Save models approx. twice every epoch
    exp_const.model_save_step = 400000 // (2 * kwargs['train_batch_size']
                                           )  # 4000=400000/(2*50)
    if exp_const.dataset == 'flickr':
        exp_const.model_save_step = 150000 // (2 * kwargs['train_batch_size'])
    val_freq_factor = 2
    if kwargs['val_frequently'] is True:
        val_freq_factor = 1
    exp_const.val_step = val_freq_factor * exp_const.model_save_step  # set to 1*model_save_step for plotting mi vs perf
    exp_const.num_val_samples = None
    exp_const.train_batch_size = kwargs['train_batch_size']
    exp_const.val_batch_size = 20
    exp_const.num_workers = 10
    exp_const.seed = 0
    exp_const.neg_noun_loss_wt = kwargs['neg_noun_loss_wt']
    exp_const.self_sup_loss_wt = kwargs['self_sup_loss_wt']
    exp_const.lang_sup_loss_wt = kwargs['lang_sup_loss_wt']
    exp_const.contextualize = not kwargs['no_context']
    exp_const.random_lang = kwargs['random_lang']

    DatasetConstants = CocoDatasetConstants
    if exp_const.dataset == 'flickr':
        DatasetConstants = FlickrDatasetConstants

    data_const = {
        'train': DatasetConstants('train'),
        'val': DatasetConstants('val'),
    }

    model_const = Constants()
    model_const.model_num = kwargs['model_num']
    model_const.object_encoder = ObjectEncoderConstants()
    model_const.object_encoder.context_layer.output_attentions = True
    model_const.object_encoder.object_feature_dim = 2048
    model_const.cap_encoder = CapEncoderConstants()
    model_const.cap_encoder.output_attentions = True
    model_const.cap_info_nce_layers = kwargs['cap_info_nce_layers']
    model_const.object_encoder_path = os.path.join(
        exp_const.model_dir, f'object_encoder_{model_const.model_num}')
    model_const.self_sup_criterion_path = os.path.join(
        exp_const.model_dir, f'self_sup_criterion_{model_const.model_num}')
    model_const.lang_sup_criterion_path = os.path.join(
        exp_const.model_dir, f'lang_sup_criterion_{model_const.model_num}')

    train(exp_const, data_const, model_const)

示例#23

0

显示文件

文件： run.py 项目： IreneMahhy/no_frills_hoi_det-mod

def _exp_eval(out_base_dir, data_const):
    args = parser.parse_args()
    not_specified_args = manage_required_args(
        args,
        parser,
        required_args=['model_num'],
        optional_args=[
            'verb_given_appearance',
            'verb_given_human_appearance',
            'verb_given_object_appearance',
            'verb_given_boxes_and_object_label',
            'verb_given_human_pose',
            'rcnn_det_prob'])

    exp_name = 'factors'
    if args.rcnn_det_prob:
        exp_name += '_rcnn_det_prob'
    if args.verb_given_appearance:
        exp_name += '_appearance'
    if args.verb_given_human_appearance:
        exp_name += '_human_appearance'
    if args.verb_given_object_appearance:
        exp_name += '_object_appearance'
    if args.verb_given_boxes_and_object_label:
        exp_name += '_boxes_and_object_label'
    if args.verb_given_human_pose:
        exp_name += '_human_pose'

    exp_const = ExpConstants(
        exp_name=exp_name,
        out_base_dir=out_base_dir)
    exp_const.model_dir = os.path.join(exp_const.exp_dir, 'models')
    data_const.balanced_sampling = False
    
    model_const = Constants()
    model_const.model_num = args.model_num
    model_const.hoi_classifier = HoiClassifierConstants()
    model_const.hoi_classifier.verb_given_appearance = args.verb_given_appearance
    model_const.hoi_classifier.verb_given_human_appearance = args.verb_given_human_appearance
    model_const.hoi_classifier.verb_given_object_appearance = args.verb_given_object_appearance
    model_const.hoi_classifier.verb_given_boxes_and_object_label = args.verb_given_boxes_and_object_label
    model_const.hoi_classifier.verb_given_human_pose = args.verb_given_human_pose
    model_const.hoi_classifier.rcnn_det_prob = args.rcnn_det_prob
    model_const.hoi_classifier.model_pth = os.path.join(
        exp_const.model_dir,
        f'hoi_classifier_{model_const.model_num}')

    if isinstance(data_const, FeatureConstantsVcoco):
        data_sign = 'vcoco'
    else:
        data_sign = 'hico'
    evaluate.main(exp_const, data_const, model_const, data_sign)

示例#24

0

显示文件

def exp_concat_random_with_glove():
    exp_name = 'concat_with_glove_100' # alt. xformed_
    out_base_dir = os.path.join(
        os.getcwd(),
        'symlinks/exp/multi_sense_cooccur/linear_100')
    exp_const = ExpConstants(exp_name,out_base_dir)
    exp_const.random_dim = 100

    data_const = Constants()
    glove_const = GloveConstantsFactory.create(dim='100')
    data_const.glove_idx = glove_const.word_to_idx_json
    data_const.glove_h5py = glove_const.embeddings_h5py

    concat_random_with_glove.main(exp_const,data_const)

示例#25

0

显示文件

    def run_single_fold(self, parameters):

        fold = parameters['fold']

        Constants.update_properties(parameters)

        Constants.print_properties()

        utilities.plant_seeds()
        self.load()

        records = self.original_records

        # self.plant_seeds()
        total_cycle_time = 0.0
        num_folds = Constants.CROSS_VALIDATION_NUM_FOLDS
        split = 1 - (1 / float(num_folds))
        self.records = copy.deepcopy(records)
        if Constants.SHUFFLE_DATA:
            self.shuffle(self.records)

        fold_start = time.time()
        cv_start = float(fold) / num_folds
        print('\nFold: %d/%d' % ((fold + 1), num_folds))

        self.create_tmp_file_names(0, fold)
        self.train_records, self.test_records = \
            ETLUtils.split_train_test_copy(
                self.records, split=split, start=cv_start)
        # subsample_size = int(len(self.train_records)*0.5)
        # self.train_records = self.train_records[:subsample_size]
        self.get_records_to_predict(True)
        if Constants.USE_CONTEXT:
            if Constants.SEPARATE_TOPIC_MODEL_RECSYS_REVIEWS:
                self.load_cache_context_topics(None, None)
            else:
                context_extractor = self.train_topic_model(0, fold)
                self.find_reviews_topics(context_extractor, 0, fold)
        else:
            self.context_rich_topics = []
        self.predict()
        metrics = self.evaluate()

        fold_end = time.time()
        fold_time = fold_end - fold_start
        total_cycle_time += fold_time
        self.clear()
        print("Total fold %d time = %f seconds" % ((fold + 1), fold_time))

        return metrics

示例#26

0

显示文件

def exp_extract_embeddings():
    args = parser.parse_args()
    not_specified_args = manage_required_args(
        args,
        parser,
        required_args=[
            'embed_dim',
            'xform',
            'model_num',
            'syn'])

    exp_name = f'{args.xform}_{args.embed_dim}'
    out_base_dir = os.path.join(
        os.getcwd(),
        'symlinks/exp/multi_sense_cooccur')
    exp_const = ExpConstants(exp_name,out_base_dir)
    exp_const.model_dir = os.path.join(exp_const.exp_dir,'models')
    exp_const.cooccur_types = [
        'syn',
        'attr_attr',
        'obj_attr',
        'obj_hyp',
        'context'
    ]
    if args.syn==False:
        exp_const.cooccur_types = exp_const.cooccur_types[1:]

    data_const = MultiSenseCooccurDatasetConstants()
    data_const.cooccur_csv = os.path.join(
        os.getcwd(),
        'symlinks/exp/multi_sense_cooccur/cooccurrences/merged_cooccur.csv')

    model_const = Constants()
    model_const.model_num = args.model_num
    model_const.net = LogBilinearConstants()
    model_const.net.num_words = 93553
    model_const.net.embed_dims = args.embed_dim
    model_const.net.two_embedding_layers = False
    model_const.net.xform_type = args.xform
    model_const.net.xform_num_layers = None
    model_const.net.use_bias = True
    model_const.net.use_fx = False
    model_const.net.cooccur_types = copy.deepcopy(exp_const.cooccur_types)
    model_const.net_path = os.path.join(
        exp_const.model_dir,
        f'net_{model_const.model_num}')

    extract_embeddings.main(exp_const,data_const,model_const)
    extract_embeddings_xformed.main(exp_const,data_const,model_const)

示例#27

0

显示文件

文件： run.py 项目： IreneMahhy/no_frills_hoi_det-mod-master

def _exp_train(out_base_dir,
               data_const_train,
               data_const_val,
               data_sign='hico'):
    args = parser.parse_args()
    not_specified_args = manage_required_args(
        args,
        parser,
        required_args=['imgs_per_batch', 'fp_to_tp_ratio'],
        optional_args=[
            'verb_given_appearance', 'verb_given_human_appearance',
            'verb_given_object_appearance',
            'verb_given_boxes_and_object_label', 'verb_given_human_pose',
            'rcnn_det_prob'
        ])

    exp_name = 'factors'
    if args.rcnn_det_prob:
        exp_name += '_rcnn_det_prob'
    if args.verb_given_appearance:
        exp_name += '_appearance'
    if args.verb_given_human_appearance:
        exp_name += '_human_appearance'
    if args.verb_given_object_appearance:
        exp_name += '_object_appearance'
    if args.verb_given_boxes_and_object_label:
        exp_name += '_boxes_and_object_label'
    if args.verb_given_human_pose:
        pose = '_human_pose'
        exp_name += pose

    exp_const = ExpConstants(exp_name=exp_name, out_base_dir=out_base_dir)
    exp_const.log_dir = os.path.join(exp_const.exp_dir, 'log')
    exp_const.model_dir = os.path.join(exp_const.exp_dir, 'models')
    exp_const.num_epochs = 10
    exp_const.imgs_per_batch = args.imgs_per_batch
    exp_const.lr = 1e-3

    model_const = Constants()
    model_const.hoi_classifier = HoiClassifierConstants(data_sign)
    model_const.hoi_classifier.verb_given_appearance = args.verb_given_appearance
    model_const.hoi_classifier.verb_given_human_appearance = args.verb_given_human_appearance
    model_const.hoi_classifier.verb_given_object_appearance = args.verb_given_object_appearance
    model_const.hoi_classifier.verb_given_boxes_and_object_label = args.verb_given_boxes_and_object_label
    model_const.hoi_classifier.verb_given_human_pose = args.verb_given_human_pose
    model_const.hoi_classifier.rcnn_det_prob = args.rcnn_det_prob

    train.main(exp_const, data_const_train, data_const_val, model_const,
               data_sign)

示例#28

0

显示文件

文件： word_context_top_n_runner.py 项目： melqkiades/yelp

def run_tests():

    combined_parameters = parameter_combinator.get_combined_parameters()

    test_cycle = 1
    num_tests = len(combined_parameters)
    for properties in combined_parameters:
        Constants.update_properties(properties)
        context_top_n_runner = WordContextTopNRunner()

        print('\n\n******************\nTest %d/%d\n******************\n' %
              (test_cycle, num_tests))

        context_top_n_runner.perform_cross_validation()
        test_cycle += 1

示例#29

0

显示文件

def run_tests():

    combined_parameters = parameter_combinator.hotel_context_parameters()

    test_cycle = 1
    num_tests = len(combined_parameters)
    for properties in combined_parameters:
        Constants.update_properties(properties)
        context_top_n_runner = ContextTopNRunner()

        print('\n\n******************\nTest %d/%d\n******************\n' %
              (test_cycle, num_tests))

        context_top_n_runner.perform_cross_validation()
        test_cycle += 1

示例#30

0

显示文件

    def full_cycle(self):
        Constants.print_properties()
        print('%s: full cycle' % time.strftime("%Y/%m/%d-%H:%M:%S"))
        utilities.plant_seeds()

        if self.use_cache and \
                os.path.exists(Constants.PROCESSED_RECORDS_FILE):
            print('Records have already been processed')
            self.records = \
                ETLUtils.load_json_file(Constants.PROCESSED_RECORDS_FILE)
        else:
            self.preprocess()

        if Constants.SEPARATE_TOPIC_MODEL_RECSYS_REVIEWS:
            self.separate_recsys_topic_model_records()

示例#31

0

显示文件

    def evaluate(self, settings, time_file=None):

        constants = Constants()
        if not settings.random_agent:
            net, tmp_net, init = self.get_model(
                settings
            )  # Initialize various constants for 3D reconstruction.
            saver = tf.train.Saver()

        # Tensorboard saver.
        #saver = tf.train.Saver()

        sess = None
        if not settings.random_agent:
            config = tf.ConfigProto()

            sess = tf.Session(config=config)

        if settings.random_agent:
            agent = RandomAgent()
        else:
            sess.run(init)
            self.load_net_weights(saver, sess, settings, latest=False)

            sess.graph.finalize()
            agent = NetAgent(net, None)
            net.set_session(sess)

        if settings.carla:
            self.evaluate_method_carla(agent, constants, sess, settings)

        else:
            self.evaluate_methods_cs(agent, constants, sess, settings)

示例#32

0

显示文件

文件： classifier_evaluator.py 项目： srividya89/yelp

def load_pipeline():

    best_hyperparams_file_name = Constants.generate_file_name(
        'best_hyperparameters', 'json', Constants.CACHE_FOLDER, None, None,
        False)

    if not os.path.exists(best_hyperparams_file_name):
        print('Recsys contextual records have already been generated')
        full_cycle()

    with open(best_hyperparams_file_name, 'r') as json_file:
        file_contents = json_file.read()
        parameters = json.loads(file_contents)

        print(parameters)

        classifiers = {
            'logisticregression': LogisticRegression(),
            'svc': SVC(),
            'kneighborsclassifier': KNeighborsClassifier(),
            'decisiontreeclassifier': DecisionTreeClassifier(),
            'nusvc': NuSVC(),
            'randomforestclassifier': RandomForestClassifier()
        }

        classifier = classifiers[parameters['classifier'].lower()]
        # print(classifier)
        classifier_params = get_classifier_params(parameters)
        classifier.set_params(**classifier_params)
        print(classifier)

        resampler = sampler_factory.create_sampler(
            parameters['resampler'], Constants.DOCUMENT_CLASSIFIER_SEED)

        return Pipeline([('resampler', resampler), ('classifier', classifier)])

示例#33

0

显示文件

文件： classifier_evaluator.py 项目： melqkiades/yelp

def load_pipeline():

    best_hyperparams_file_name = Constants.generate_file_name(
        'best_hyperparameters', 'json', Constants.CACHE_FOLDER, None,
        None, False)

    if not os.path.exists(best_hyperparams_file_name):
        print('Recsys contextual records have already been generated')
        full_cycle()

    with open(best_hyperparams_file_name, 'r') as json_file:
        file_contents = json_file.read()
        parameters = json.loads(file_contents)

        print(parameters)

        classifiers = {
            'logisticregression': LogisticRegression(),
            'svc': SVC(),
            'kneighborsclassifier': KNeighborsClassifier(),
            'decisiontreeclassifier': DecisionTreeClassifier(),
            'nusvc': NuSVC(),
            'randomforestclassifier': RandomForestClassifier()
        }

        classifier = classifiers[parameters['classifier'].lower()]
        # print(classifier)
        classifier_params = get_classifier_params(parameters)
        classifier.set_params(**classifier_params)
        print(classifier)

        resampler = sampler_factory.create_sampler(
            parameters['resampler'], Constants.DOCUMENT_CLASSIFIER_SEED)

        return Pipeline([('resampler', resampler), ('classifier', classifier)])

示例#34

0

显示文件

文件： context_transformer.py 项目： swarnamd/yelp

    def export_records(self):
        print('%s: exporting transformed records' %
              time.strftime("%Y/%m/%d-%H:%M:%S"))

        records_to_export = []
        desired_fields = [
            Constants.USER_INTEGER_ID_FIELD,
            Constants.ITEM_INTEGER_ID_FIELD,
            Constants.RATING_FIELD,
            Constants.CONTEXT_FIELD,
        ]

        for record in self.records:
            new_record = {field: record[field] for field in desired_fields}
            records_to_export.append(new_record)

        file_name = Constants.generate_file_name(
            'recsys_formatted_context_records',
            'json',
            Constants.CACHE_FOLDER,
            None,
            None,
            True,
            True,
            uses_carskit=False,
            normalize_topics=True,
            format_context=True)
        ETLUtils.save_json_file(file_name, records_to_export)

示例#35

0

显示文件

文件： topic_model_stability.py 项目： swarnamd/yelp

def full_cycle(metric):
    csv_file_name = Constants.generate_file_name(metric, 'csv',
                                                 Constants.RESULTS_FOLDER,
                                                 None, None, False)
    json_file_name = Constants.generate_file_name(metric, 'json',
                                                  Constants.RESULTS_FOLDER,
                                                  None, None, False)
    print(json_file_name)
    print(csv_file_name)

    properties = Constants.get_properties_copy()
    results = evaluate_topic_model(metric)
    print(results)
    results.update(properties)

    ETLUtils.write_row_to_csv(csv_file_name, results)
    ETLUtils.write_row_to_json(json_file_name, results)

示例#36

0

显示文件

文件： topic_ensemble_caller.py 项目： melqkiades/yelp

def get_topic_model_prefix(folder='', seed=None):

    prefix = 'topic_model'
    if seed is not None:
        prefix += '_seed-' + str(seed)

    return Constants.generate_file_name(
        prefix, '', folder, None, None, True, True)[:-1]

示例#37

0

显示文件

文件： main_runner.py 项目： melqkiades/yelp

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-f', '--fold', metavar='int', type=int,
        nargs=1, help='The index of the cross validation fold')

    args = parser.parse_args()
    fold = args.fold[0]

    new_properties = {
        Constants.NESTED_CROSS_VALIDATION_CYCLE_FIELD: fold,
        Constants.CROSS_VALIDATION_STRATEGY_FIELD: 'nested_validate'
    }

    Constants.update_properties(new_properties)

    context_top_n_runner.run_tests()

示例#38

0

显示文件

文件： topic_ensemble_caller.py 项目： swarnamd/yelp

def get_topic_model_prefix(folder='', seed=None):

    prefix = 'topic_model'
    if seed is not None:
        prefix += '_seed-' + str(seed)

    return Constants.generate_file_name(prefix, '', folder, None, None, True,
                                        True)[:-1]

示例#39

0

显示文件

文件： carskit_caller.py 项目： swarnamd/yelp

def save_results(results):

    # Take the results given by the run_carskit function and extend them with
    # the Constants.get_properties() dictionary, then save them to a CSV file
    """

    :type results: list[dict]
    :param results:
    """
    properties = Constants.get_properties_copy()

    json_file = Constants.generate_file_name('carskit_results', 'json',
                                             OUTPUT_FOLDER, None, None, False)

    for result in results:
        result.update(properties)
        write_results_to_json(json_file, result)

示例#40

0

显示文件

文件： topic_model_stability.py 项目： melqkiades/yelp

def full_cycle(metric):
    csv_file_name = Constants.generate_file_name(
        metric, 'csv', Constants.RESULTS_FOLDER, None,
        None, False)
    json_file_name = Constants.generate_file_name(
        metric, 'json', Constants.RESULTS_FOLDER, None,
        None, False)
    print(json_file_name)
    print(csv_file_name)

    properties = Constants.get_properties_copy()
    results = evaluate_topic_model(metric)
    print(results)
    results.update(properties)

    ETLUtils.write_row_to_csv(csv_file_name, results)
    ETLUtils.write_row_to_json(json_file_name, results)

示例#41

0

显示文件

文件： main.py 项目： melqkiades/yelp

def topic_stability_main():

    records = ETLUtils.load_json_file(Constants.PROCESSED_RECORDS_FILE)

    # num_topic_list = range(2, 101)
    num_topic_list = [2, 5]
    results = {}
    for num_topics in num_topic_list:
        new_properties = {Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: num_topics}
        Constants.update_properties(new_properties)
        results[num_topics] = calculate_topic_stability(records)

    print('Results:')
    for num_topics in num_topic_list:
        scores = results[num_topics]
        print('%d: %.4f [%.4f,%.4f]' %
              (num_topics, numpy.nanmean(scores), numpy.nanmin(scores),
               numpy.nanmax(scores)))

示例#42

0

显示文件

文件： carskit_caller.py 项目： melqkiades/yelp

def save_results(results):

    # Take the results given by the run_carskit function and extend them with
    # the Constants.get_properties() dictionary, then save them to a CSV file

    """

    :type results: list[dict]
    :param results:
    """
    properties = Constants.get_properties_copy()

    json_file = Constants.generate_file_name(
        'carskit_results', 'json', OUTPUT_FOLDER, None, None, False)

    for result in results:
        result.update(properties)
        write_results_to_json(json_file, result)

示例#43

0

显示文件

def load_topic_model(cycle_index, fold_index):
    file_path = \
        Constants.generate_file_name(
            'topic_model', 'pkl', Constants.CACHE_FOLDER,
            cycle_index, fold_index, True)
    print(file_path)
    with open(file_path, 'rb') as read_file:
        topic_model = pickle.load(read_file)
    return topic_model

示例#44

0

显示文件

文件： topic_model_stability.py 项目： melqkiades/yelp

def create_all_term_rankings(records, metric):
    print('%s: creating all term rankings' % time.strftime("%Y/%m/%d-%H:%M:%S"))

    all_term_rankings = []

    # context_extractor =\
    #     topic_model_creator.create_topic_model(records, None, None)
    # terms_matrix = get_topic_model_terms(
    #     context_extractor, Constants.TOPIC_MODEL_STABILITY_NUM_TERMS)
    # all_term_rankings.append(terms_matrix)

    context_extractor = \
        topic_model_creator.train_context_extractor(records, False)
    terms_matrix = get_topic_model_terms(
        context_extractor, Constants.TOPIC_MODEL_STABILITY_NUM_TERMS)
    all_term_rankings.append(terms_matrix)

    sample_ratio = Constants.TOPIC_MODEL_STABILITY_SAMPLE_RATIO

    if metric in [TERM_STABILITY_PAIRWISE, TERM_DIFFERENCE]:
        sample_ratio = None
        Constants.update_properties(
            {Constants.TOPIC_MODEL_STABILITY_SAMPLE_RATIO_FIELD: sample_ratio})
        msg = 'Warning: Since the metric is \'%s\' I have updated the ' \
              'topic_model_stability_sample_ratio value to None' % metric
        print(msg)

    num_iterations = Constants.TOPIC_MODEL_STABILITY_ITERATIONS
    for i in range(num_iterations - 1):
        print('Iteration %d/%d' % (i+1, num_iterations))
        print('sample_ratio:', sample_ratio)

        if sample_ratio is None:
            sampled_records = records
        else:
            sampled_records = sample_list(records, sample_ratio)
        context_extractor = \
            topic_model_creator.train_context_extractor(sampled_records, False)
        terms_matrix = get_topic_model_terms(
            context_extractor, Constants.TOPIC_MODEL_STABILITY_NUM_TERMS)
        all_term_rankings.append(terms_matrix)

    return all_term_rankings

示例#45

0

显示文件

文件： topic_model_divergence.py 项目： melqkiades/yelp

def test():
    document_term_matrix = NmfTopicExtractor.load_document_term_matrix()

    results = []

    # my_list = range(2, 31)
    my_list = range(2, 61)

    for i in my_list:
        Constants.update_properties({Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: i})
        topic_model = NmfTopicExtractor()
        topic_model.load_trained_data()

        document_topic_matrix = topic_model.document_topic_matrix
        topic_term_matrix = topic_model.topic_term_matrix

        divergence = calculate_divergence(
            document_term_matrix, document_topic_matrix, topic_term_matrix)

        result = {
            'num_topics': Constants.TOPIC_MODEL_NUM_TOPICS,
            'divergence': divergence,
            Constants.TOPIC_MODEL_TYPE_FIELD: 'ensemble',
            Constants.BUSINESS_TYPE_FIELD: Constants.ITEM_TYPE
        }

        results.append(result)

        print('Num topics: %d, Divergence: %f' %
              (Constants.TOPIC_MODEL_NUM_TOPICS, divergence))

    for result in results:
        print('%d %f' % (result['num_topics'], result['divergence']))

    prefix = Constants.RESULTS_FOLDER + Constants.ITEM_TYPE +\
        '_topic_model_divergence'
    csv_file_path = prefix + '.csv'
    json_file_path = prefix + '.json'
    headers = sorted(results[0].keys())
    ETLUtils.save_csv_file(csv_file_path, results, headers)
    ETLUtils.save_json_file(json_file_path, results)

示例#46

0

显示文件

文件： topic_model_eval_caller.py 项目： melqkiades/yelp

def run_eval_topic_model(metric):

    parse_directory_command = Constants.TOPIC_ENSEMBLE_FOLDER + \
        'eval-' + metric.replace('_', '-') + '.py'

    csv_file = Constants.generate_file_name(
        metric, 'csv', BASE_FOLDER, None, None, True, True)

    dataset_file_name = Constants.generate_file_name(
        'topic_model', '', BASE_FOLDER, None, None, True, True)[:-1] +\
        '/ranks*.pkl'
    topic_model_files = glob.glob(dataset_file_name)

    command = [
        PYTHON_COMMAND,
        parse_directory_command,
    ]
    command.extend(topic_model_files)
    command.extend([
        '-o',
        csv_file
    ])

    print(' '.join(command))

    unique_id = uuid.uuid4().hex
    log_file_name = Constants.GENERATED_FOLDER + Constants.ITEM_TYPE + '_' + \
        Constants.TOPIC_MODEL_TARGET_REVIEWS + '_' + metric + '_' +\
        unique_id + '.log'
    #
    log_file = open(log_file_name, "w")
    p = subprocess.Popen(
        command, stdout=log_file, cwd=Constants.TOPIC_ENSEMBLE_FOLDER)
    p.wait()

    results = read_csv_first_column_as_key(csv_file, metric)
    results[Constants.TOPIC_MODEL_NUM_TOPICS_FIELD] =\
        Constants.TOPIC_MODEL_NUM_TOPICS
    results[Constants.TOPIC_MODEL_TYPE_FIELD] = Constants.TOPIC_MODEL_TYPE

    return results

示例#47

0

显示文件

文件： nmf_topic_extractor.py 项目： melqkiades/yelp

    def load_document_term_matrix():
        topic_model_corpus_folder = \
            Constants.CACHE_FOLDER + 'topic_models/corpus/'
        corpus_path = Constants.generate_file_name(
            'topic_ensemble_corpus', '', topic_model_corpus_folder,
            None, None, False)[:-1] + '.pkl'

        document_term_matrix, _, _, _ = load_corpus(corpus_path)

        print("Loaded document-term matrix of size %s" % str(document_term_matrix.shape))

        return document_term_matrix

示例#48

0

显示文件

文件： topic_model_divergence.py 项目： melqkiades/yelp

def create_topic_model(num_topics):
    print('%s: evaluating topic model' %
          time.strftime("%Y/%m/%d-%H:%M:%S"))

    Constants.update_properties({
        Constants.NUMPY_RANDOM_SEED_FIELD: Constants.NUMPY_RANDOM_SEED + 10,
        Constants.RANDOM_SEED_FIELD: Constants.RANDOM_SEED + 10,
        Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: num_topics
    })
    utilities.plant_seeds()
    Constants.print_properties()

    file_path = Constants.ENSEMBLED_RESULTS_FOLDER + \
                "factors_final_k%02d.pkl" % Constants.TOPIC_MODEL_NUM_TOPICS

    if os.path.exists(file_path):
        print('Ensemble topic model already exists')
        return

    # topic_ensemble_caller.run_local_parse_directory()
    topic_ensemble_caller.run_generate_kfold()
    topic_ensemble_caller.run_combine_nmf()

示例#49

0

显示文件

文件： topic_model_creator.py 项目： melqkiades/yelp

def create_single_topic_model(cycle_index, fold_index, check_exists=True):

    Constants.print_properties()
    print('%s: Start' % time.strftime("%Y/%m/%d-%H:%M:%S"))

    if Constants.SEPARATE_TOPIC_MODEL_RECSYS_REVIEWS:
        msg = 'This function shouldn\'t be used when the ' \
              'separate_topic_model_recsys_reviews property is set to True'
        raise ValueError(msg)

    records = ETLUtils.load_json_file(Constants.PROCESSED_RECORDS_FILE)

    if Constants.CROSS_VALIDATION_STRATEGY == 'nested_test':
        pass
    elif Constants.CROSS_VALIDATION_STRATEGY == 'nested_validate':
        num_folds = Constants.CROSS_VALIDATION_NUM_FOLDS
        cycle = Constants.NESTED_CROSS_VALIDATION_CYCLE
        split = 1 - (1 / float(num_folds))
        cv_start = float(cycle) / num_folds
        print('cv_start', cv_start)
        records, _ = ETLUtils.split_train_test(records, split, cv_start)
    else:
        raise ValueError('Unknown cross-validation strategy')

    utilities.plant_seeds()
    num_folds = Constants.CROSS_VALIDATION_NUM_FOLDS
    split = 1 - (1/float(num_folds))

    for i in range(cycle_index+1):

        if Constants.SHUFFLE_DATA:
            random.shuffle(records)

    cv_start = float(fold_index) / num_folds
    train_records, test_records = \
        ETLUtils.split_train_test(records, split=split, start=cv_start)
    return create_topic_model(
        train_records, cycle_index, fold_index, check_exists)

示例#50

0

显示文件

文件： main.py 项目： melqkiades/yelp

def create_topic_model_with_context_records():

    processed_records_file = Constants.generate_file_name(
        'classified_processed_reviews', 'json', Constants.CACHE_FOLDER, None,
        None, False, True)
    records = ETLUtils.load_json_file(processed_records_file)
    print('records length: %d' % len(records))

    context_records = ETLUtils.filter_records(records, 'context_type', ['context'])
    print('context records length: %d' % len(context_records))
    context_specific_records = ETLUtils.filter_records(context_records, 'predicted_class', ['specific'])
    print('context specific records length: %d' % len(context_specific_records))

    for i in range(len(context_specific_records)):
        # print('%d:\t%s' % (i, context_records[i]['text']))
        print('%d:\t%s' % (i, context_specific_records[i]['bow']))

    for i in range(1, len(context_records)+1):

        Constants.update_properties({Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: i})
        context_extractor = \
            topic_model_creator.create_topic_model(records, None, None)

        topic_data = []

        for topic in range(Constants.TOPIC_MODEL_NUM_TOPICS):
            result = {}
            result['topic_id'] = topic
            result.update(split_topic(context_extractor.print_topic_model(
                num_terms=Constants.TOPIC_MODEL_STABILITY_NUM_TERMS)[topic]))
            result['ratio'] = context_extractor.topic_ratio_map[topic]
            result['weighted_frequency'] = \
                context_extractor.topic_weighted_frequency_map[topic]
            topic_data.append(result)

        file_name = Constants.generate_file_name(
            'manual_topic_model', 'xlsx', Constants.DATASET_FOLDER, None, None, True)
        generate_excel_file(topic_data, file_name)

示例#51

0

显示文件

文件： carskit_caller.py 项目： melqkiades/yelp

def analyze_results():
    json_file = Constants.generate_file_name(
        'carskit_results', 'json', OUTPUT_FOLDER, None, None, False)
    records = ETLUtils.load_json_file(json_file)

    data_frame = pandas.DataFrame(records)
    print(sorted(list(data_frame.columns.values)))
    cols = [
        'ck_rec10', 'ck_pre10', 'ck_algorithm', 'carskit_nominal_format',
        'topic_model_num_topics', 'topic_model_normalize']
    data_frame = data_frame[cols]
    data_frame = data_frame.sort_values(['ck_rec10'])
    print(data_frame)

    data_frame.to_csv('/Users/fpena/tmp/' + Constants.ITEM_TYPE + '_carskit.csv')

示例#52

0

显示文件

文件： topic_model_stability.py 项目： melqkiades/yelp

def evaluate_topic_model(metric):
    print('%s: evaluating topic model' %
          time.strftime("%Y/%m/%d-%H:%M:%S"))

    Constants.update_properties({
        Constants.NUMPY_RANDOM_SEED_FIELD: Constants.NUMPY_RANDOM_SEED + 10,
        Constants.RANDOM_SEED_FIELD: Constants.RANDOM_SEED + 10
    })
    utilities.plant_seeds()
    Constants.print_properties()

    records = ETLUtils.load_json_file(Constants.PROCESSED_RECORDS_FILE)
    if Constants.SEPARATE_TOPIC_MODEL_RECSYS_REVIEWS:
        num_records = len(records)
        records = records[:num_records / 2]
    print('num_reviews', len(records))

    all_term_rankings = None
    topic_model_type = Constants.TOPIC_MODEL_TYPE
    if topic_model_type in ['lda', 'nmf']:
        all_term_rankings = create_all_term_rankings(records, metric)
    elif topic_model_type == 'ensemble':
        all_term_rankings = create_all_term_rankings_from_ensemble()
    else:
        raise ValueError(
            'Unrecognized topic modeling algorithm: \'%s\'' % topic_model_type)
    print('Total iterations: %d' % len(all_term_rankings))

    if metric == TERM_STABILITY_REFERENCE:
        return eval_term_stability_reference(all_term_rankings)
    if metric == TERM_STABILITY_PAIRWISE:
        return eval_term_stability_pairwise(all_term_rankings)
    elif metric == TERM_DIFFERENCE:
        return eval_term_difference(all_term_rankings)
    else:
        raise ValueError('Unknown evaluation metric: \'%s\'' % metric)

示例#53

0

显示文件

文件： topic_model_parameter_tuner.py 项目： melqkiades/yelp

def run_recommender(args):
    import sys
    # sys.path.append('/Users/fpena/UCC/Thesis/projects/yelp/source/python')
    sys.path.append('/home/fpena/yelp/source/python')
    from utils.constants import Constants
    from topicmodeling.context import topic_model_analyzer

    print('\n\n************************\n************************\n')
    print('args', args)

    parameters = {
        Constants.BUSINESS_TYPE_FIELD: args[Constants.BUSINESS_TYPE_FIELD],
        # 'lda_alpha': args['lda_alpha'],
        # 'lda_beta': args['lda_beta'],
        Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD:
            args[Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD],
        Constants.TOPIC_MODEL_ITERATIONS_FIELD:
            int(args[Constants.TOPIC_MODEL_ITERATIONS_FIELD]),
        Constants.TOPIC_MODEL_PASSES_FIELD:
            int(args[Constants.TOPIC_MODEL_PASSES_FIELD]),
        Constants.TOPIC_MODEL_NUM_TOPICS_FIELD:
            int(args[Constants.TOPIC_MODEL_NUM_TOPICS_FIELD]),
        # 'topic_weighting_method': args['topic_weighting_method'],
        Constants.USE_CONTEXT_FIELD: args[Constants.USE_CONTEXT_FIELD]
    }

    Constants.update_properties(parameters)
    # Finish updating parameters

    results = topic_model_analyzer.export_topics()
    results['loss'] = -results['combined_score']
    results['status'] = 'ok'

    print('loss', results['loss'])

    return results

示例#54

0

显示文件

文件： topic_model_stability.py 项目： melqkiades/yelp

def get_topic_ensemble_ranks_file_paths():

    num_models = Constants.TOPIC_MODEL_STABILITY_ITERATIONS
    random_seeds = range(1, num_models + 1)

    suffix = 'ranks_ensemble_k%02d.pkl' % Constants.TOPIC_MODEL_NUM_TOPICS

    file_paths = []

    for seed in random_seeds:
        prefix = 'topic_model_seed-' + str(seed)
        topic_model_folder = Constants.generate_file_name(
            prefix, '', Constants.ENSEMBLE_FOLDER, None, None, True, True)[:-1]
        topic_model_file = topic_model_folder + '/' + suffix
        file_paths.append(topic_model_file)

    return file_paths

示例#55

0

显示文件

文件： topic_model_creator.py 项目： melqkiades/yelp

def create_topic_model(records, cycle_index, fold_index, check_exists=True):

    print('%s: Create topic model' % time.strftime("%Y/%m/%d-%H:%M:%S"))

    topic_model_file_path = \
        Constants.generate_file_name(
            'topic_model', 'pkl', Constants.CACHE_FOLDER,
            cycle_index, fold_index, True)

    print(topic_model_file_path)

    if check_exists and os.path.exists(topic_model_file_path):
        print('WARNING: Topic model already exists')
        return load_topic_model(cycle_index, fold_index)

    topic_model = train_context_extractor(records)

    with open(topic_model_file_path, 'wb') as write_file:
        pickle.dump(topic_model, write_file, pickle.HIGHEST_PROTOCOL)

    return topic_model

示例#56

0

显示文件

文件： context_transformer.py 项目： melqkiades/yelp

    def export_records(self):
        print('%s: exporting transformed records' %
              time.strftime("%Y/%m/%d-%H:%M:%S"))

        records_to_export = []
        desired_fields = [
            Constants.USER_INTEGER_ID_FIELD,
            Constants.ITEM_INTEGER_ID_FIELD,
            Constants.RATING_FIELD,
            Constants.CONTEXT_FIELD,
        ]

        for record in self.records:
            new_record = {field: record[field] for field in desired_fields}
            records_to_export.append(new_record)

        file_name = Constants.generate_file_name(
            'recsys_formatted_context_records', 'json', Constants.CACHE_FOLDER,
            None, None, True, True, uses_carskit=False, normalize_topics=True,
            format_context=True)
        ETLUtils.save_json_file(file_name, records_to_export)