Exemplo n.º 1
0
def plot_measure_of_group(group: List[int], method: str,
                          model: models.ScoreModel, title: str,
                          measure_array: np.array, axis_label: Tuple[str, str],
                          dest_path_tail: str):
    #compare user
    dest = share.PLOT_TOP + '/measure_of_group/' + model.get_dir_name(
    ) + '/' + dest_path_tail
    util.init_file(dest)
    pyplot.figure(figsize=(10, 6))
    pyplot.subplot(GS[0, :GRID_WIDTH - 1])
    for user_id in group:
        source = util.get_result_path(dir_name=share.RESULT_TOP + '/' +
                                      model.get_dir_name(),
                                      method=method,
                                      user_id=user_id)
        data = pd.read_csv(source)
        xs = [x * 0.1 for x in range(measure_array.shape[0])]
        ys = []  #sequence type differ ok???
        for measure in measure_array:
            ys.append(data[measure].values[0])
        if measure_array.shape[0] == 1:
            pyplot.scatter(xs, ys, label='user' + str(user_id))
        else:
            pyplot.plot(xs, ys, label='user' + str(user_id))
    pyplot.title(title)
    pyplot.xlabel(axis_label[0])
    pyplot.ylabel(axis_label[1])
    pyplot.xticks(xs, measure_array)
    pyplot.legend(bbox_to_anchor=(1, 1), loc='upper left', borderaxespad=0)
    pyplot.savefig(dest)
Exemplo n.º 2
0
def plot_measure_of_compare(compare_dict: Dict[str, Tuple[str,
                                                          models.ScoreModel]],
                            title: str, group: List[int],
                            measure_array: np.array,
                            axis_label: Tuple[str, str], dest_path_tail: str):
    #compare model of compare_dict
    dest = share.PLOT_TOP + '/measure_of_compare/' + dest_path_tail
    util.init_file(dest)
    pyplot.figure(figsize=(10, 6))
    pyplot.subplot(GS[0, :GRID_WIDTH - 1])
    for key, (method, compare) in compare_dict.items():
        xs = [x * 0.1 for x in range(measure_array.shape[0])]  #grid size
        ys = []
        for measure in measure_array:
            value = .0
            for user_id in group:
                source = util.get_result_path(dir_name=share.RESULT_TOP + '/' +
                                              compare.get_dir_name(),
                                              method=method,
                                              user_id=user_id)
                data = pd.read_csv(source)
                value += data[measure].values[0] / len(group)
            ys.append(value)
        if measure_array.shape[0] == 1:
            #for MAiP
            pyplot.scatter(xs, ys, label=key)
        else:
            pyplot.plot(xs, ys, label=key)

    pyplot.title(title)
    pyplot.xlabel(axis_label[0])
    pyplot.ylabel(axis_label[1])
    pyplot.xticks(xs, measure_array)
    pyplot.legend(bbox_to_anchor=(1, 1), loc='upper left', borderaxespad=0)
    pyplot.savefig(dest)
Exemplo n.º 3
0
def set_exam_tex_form(group:List[int],compare_dict:Dict[str,Tuple[str,models.ScoreModel]],title:str):
    #OrderedDict,compare_dict={'key':(method,model),...},OrderedDict
    #user_id,measure1,measure2,...
    dest_h=dir_name=share.TEX_EXAM_SOURCE_TOP+'/'+title#method???
    splitter=','
    keys=compare_dict.keys()
    #ordered dict ideal
    for measure_type in share.MEASURE_TYPE_LIST:
        for measure in share.MEASURE_TYPE_MEASURE_DICT[measure_type]:
            dest=dest_h+'/'+measure
            header='user_id'
            for key in keys:
                header+=splitter+key
            util.init_file(dest)
            with open(dest,'wt') as fout:
                fout.write(header+'\n')
                for user_id in group:
                    line=str(user_id)
                    for key in keys:
                        line+=splitter
                        method,score_model=compare_dict[key]
                        source=util.get_result_path(dir_name=share.RESULT_TOP+'/'+score_model.get_dir_name(),user_id=user_id,method=method)
                        try:
                            data=pd.read_csv(source)
                            line+=str(round(data[measure].values[0],share.DIGIT))
                        except FileNotFoundError:
                            print(source+' not found'+'\n')
                    fout.write(line+'\n')
Exemplo n.º 4
0
def get_measure_tex_form(group:List[int],compare_dict:Dict[str,Tuple[str,models.ScoreModel]],title:str):#compare_dict must be OrderedDict
    dest=dir_name=share.TEX_MEASURE_TOP+'/'+title
    splitter='&'
    measure_suffix='\\\n'
    measure_type_suffix='\\ \hline\n'
    header='measure'
    group_size=len(group)
    keys=compare_dict.keys()
    #ordered_dict
    for key in keys:
        header+=splitter+key
    header+=measure_suffix
    util.init_file(dest)
    with open(dest,'wt') as fout:
        fout.write(header)
        for measure_type in share.MEASURE_TYPE_LIST:
            measure_array=share.MEASURE_TYPE_MEASURE_DICT[measure_type]
            for i,measure in enumerate(measure_array):
                line=measure
                for key in keys:
                    value=0.0
                    line+=splitter
                    for user_id in group:
                        method,model=compare_dict[key]
                        source=util.get_result_path(dir_name=share.RESULT_TOP+'/'+model.get_dir_name(),user_id=user_id,method=method)
                        try:
                            data=pd.read_csv(source)
                            value+=data[measure].values[0]/group_size
                        except FileNotFoundError:
                            print(source+' not found\n')
                            break
                    line+=str(round(value,share.DIGIT))
                if i+1==measure_array.shape[0]:
                    line+=measure_type_suffix
                else:
                    line+=measure_suffix
                fout.write(line)
Exemplo n.º 5
0
def plot_marg_and_rank(model: models.ScoreModel, user_id: str, train_id: int,
                       method: str, boolean_value: int, title: str,
                       denominator_list: List[int], denominator_title,
                       dest_path_tail: str):
    rank_space = 10
    color_list = [
        'red', 'orange', 'yellow', 'green', 'blue', 'violet', 'black'
    ]
    _, _, mapping_id = util.get_score_mapping_param(user_id)
    all_items_marg_path = model.get_dest_dict(
    )['all_items_marg_dict'] + '/' + mapping_id
    #all marg data
    with open(all_items_marg_path, 'rb') as fin:
        all_marg_dict = pickle.load(fin)

    user_input_path = model.get_dest_dict(
    )['log_weight_and_score_model_list'] + '/' + util.get_user_train_id_path(
        user_id=user_id, train_id=train_id)
    #user marg data
    with open(user_input_path, 'rb') as fin:
        user_weight_marg_dict_list = pickle.load(fin)

    #ranking data
    rank_input_path = util.get_result_path(dir_name=share.RANKING_TOP + '/' +
                                           model.get_dir_name(),
                                           method=method,
                                           user_id=user_id,
                                           train_id=train_id)

    rank_data = pd.read_csv(rank_input_path, index_col='id')
    #count true/false sum
    denominator_size = 0
    ranking_list = []

    bool_data = rank_data[rank_data['boolean'] == boolean_value]
    size = bool_data.shape[0]

    for i in color_list:
        ranking_list.append([])
    for hotel_id, row in bool_data.iterrows():
        i = int(row['ranking'] / rank_space)
        ranking_list[i].append(hotel_id)
        if row['boolean'] in denominator_list:
            denominator_size += 1

    for score_type in share.DEFAULT_SCORE_TYPE_LIST:
        dest = util.get_result_path(
            dir_name=share.PLOT_TOP + '/marg_and_boolean/' +
            model.get_dir_name() + '/' + dest_path_tail,
            method=method,
            user_id=user_id,
            train_id=train_id) + '/' + score_type
        try:
            all_pdf = all_marg_dict[score_type].pdf
            user_pdf = util.get_pdf_from_weight_marg_dict_list(
                weight_marg_dict_list=user_weight_marg_dict_list,
                score_type=score_type)

            xs = [
                x for x in np.arange(SCORE_SPACE_DICT[score_type][1],
                                     SCORE_SPACE_DICT[score_type][0], 0.01)
            ]
            ys = [user_pdf(x) for x in xs]
            xs_all = xs
            ys_all = [all_pdf(x) for x in xs_all]
            for i, color in enumerate(color_list):
                bool_xs = [
                    bool_data.loc[index][score_type]
                    for index in ranking_list[i]
                ]
                bool_ys = [user_pdf(x) for x in bool_xs]
                pyplot.scatter(bool_xs,
                               bool_ys,
                               label='top' + str((i + 1) * rank_space),
                               color=color)
            pyplot.plot(xs_all, ys_all, label='all_items')
            pyplot.plot(xs, ys, label='user')
            pyplot.title('pdf and ' + title + ' ' + str(size) + 'items/' +
                         denominator_title + str(denominator_size) + 'items ' +
                         'for ' + score_type)
            pyplot.xlabel('score')
            pyplot.ylabel('pdf')
            pyplot.xticks(
                np.arange(SCORE_SPACE_DICT[score_type][1],
                          SCORE_SPACE_DICT[score_type][0], 0.1))
            pyplot.legend()
            util.init_file(dest)
            pyplot.savefig(dest)
            pyplot.figure()

        except KeyError:
            #score_type reduced by kl_reduced
            pass
Exemplo n.º 6
0
def do_measure(model: models.ScoreModel, group: List[int]):
    model_remapping = model.get_remapping()
    for user_id in group:
        print('###########################################')
        print('user = '******'###########################################')
        respective_method_measures_dict = {}
        user_k_folded_path = share.TRAIN_DATA_TOP + '/user' + str(
            user_id) + '_kfolded.json'
        with open(user_k_folded_path, 'rt') as fin:  #load train_and_test_data
            kfolded_training_and_test_data_list = json.load(fin)

        if model_remapping:
            remapping, score_mapping_dict, mapping_id = util.get_score_mapping_param(
                user_id)
        else:  #remapping invalid for group users
            remapping = False
        if remapping:  #differ from default mapping,=>remapping valid
            #deepcopy
            all_items = copy.deepcopy(share.ALL_ITEMS)
            util.convert_score(all_items, score_mapping_dict)
        else:
            all_items = share.ALL_ITEMS  #shallow copy

        for train_id, training_and_test_data in enumerate(
                kfolded_training_and_test_data_list
        ):  #train and test by TRAIN_SIZEs
            training_hotel_list = training_and_test_data['trainingTrue']
            training_false_hotel_list = training_and_test_data['trainingFalse']
            test_hotel_list = training_and_test_data['testTrue']
            test_false_hotel_list = training_and_test_data['testFalse']

            model.train(
                training_data_t=pd.DataFrame.from_records(training_hotel_list),
                training_data_f=pd.DataFrame.from_records(
                    training_false_hotel_list),
                all_items=all_items,
                mapping_id=mapping_id,
                train_id=train_id,
                user_id=user_id)
            #log parameter of model.train()
            model.make_log()

            ranking_dict = model.calc_ranking(all_items=all_items)
            test_hotel_id_list = [
                test_hotel['id'] for test_hotel in test_hotel_list
            ]
            training_hotel_id_list = [
                training_hotel['id'] for training_hotel in training_hotel_list
            ]
            training_false_hotel_id_list = [
                training_false_hotel['id']
                for training_false_hotel in training_false_hotel_list
            ]

            for method, ranking in ranking_dict.items():
                ranking = ranking.drop(training_hotel_id_list)
                ranking = ranking.drop(training_false_hotel_id_list)
                print(method + '\n')
                print(ranking)
                dest = util.get_result_path(dir_name=share.RANKING_TOP + '/' +
                                            model.get_dir_name(),
                                            method=method,
                                            user_id=user_id,
                                            train_id=train_id)
                util.log_ranking(all_items=all_items,
                                 ranking=ranking,
                                 path=dest,
                                 score_type_list=model.get_score_type_list(),
                                 test_id_list=test_hotel_id_list)
                #odd???
                if method not in respective_method_measures_dict:
                    temp = {}
                    for measure_type in share.MEASURE_TYPE_LIST:
                        temp[measure_type] = [
                            .0
                        ] * share.MEASURE_TYPE_MEASURE_DICT[
                            measure_type].shape[0]
                    for label_type in share.LABEL_TYPE_LIST:
                        temp[label_type] = []
                    respective_method_measures_dict[method] = temp

                ips = ip(ranking, test_hotel_id_list)
                for i in range(0,
                               share.MEASURE_TYPE_MEASURE_DICT['iP'].shape[0]):
                    #enumerate???
                    respective_method_measures_dict[method]['iP'][i] += ips[i]
                respective_method_measures_dict[method]['MAiP'][0] += ips[11]
                for i in range(
                        0, share.MEASURE_TYPE_MEASURE_DICT['nDCG'].shape[0]):
                    respective_method_measures_dict[method]['nDCG'][
                        i] += n_dcg(ranking, 5 * (i + 1), test_hotel_id_list)
                for i in range(0,
                               share.MEASURE_TYPE_MEASURE_DICT['P'].shape[0]):
                    respective_method_measures_dict[method]['P'][
                        i] += precision(ranking, 5 * (i + 1),
                                        test_hotel_id_list)

                for i, label_type in enumerate(share.LABEL_TYPE_LIST):
                    respective_method_measures_dict[method][label_type].extend(
                        adhoc_task(ranking, 10 * (i + 1), test_hotel_id_list))

        for method, respective_measures in respective_method_measures_dict.items(
        ):
            file_name = util.get_result_path(dir_name=share.RESULT_TOP + '/' +
                                             model.get_dir_name(),
                                             method=method,
                                             user_id=user_id)
            util.init_file(file_name)
            with open(file_name, 'wt') as fout:
                header = 'file,user'
                line = file_name + ',user' + str(user_id)
                for measure_type in share.MEASURE_TYPE_LIST:
                    for item, measure in enumerate(
                            share.MEASURE_TYPE_MEASURE_DICT[measure_type]):
                        header += ',' + measure
                        line += ',' + str(
                            respective_measures[measure_type][item] /
                            share.TRAIN_SIZE)
                header += '\n'
                line += '\n'
                fout.write(header + line)

            for label_type in share.LABEL_TYPE_LIST:
                label_file_name = util.get_result_path(
                    dir_name=share.LABEL_TOP + '/' + label_type + '/' +
                    model.get_dir_name(),
                    method=method,
                    user_id=user_id)
                adhoc_testing_task(label_file_name,
                                   respective_measures[label_type])