def step1_ListCreation(self, n_rankers=3, credits=False): print('Testing step 1: creation of multileaved list') arg_str = "" if (credits): arg_str = "-c True" multil = ml.ProbabilisticMultileave(arg_str) query_fh = cStringIO.StringIO(self.test_queries) queries = qu.Queries(query_fh, self.test_num_features) query = queries[queries.keys()[0]] query_fh.close() ranker_arg_str = ['ranker.model.BM25', '1'] # second arg corresponds to ranker_type.. ties = "random" feature_count = None rankers = [rnk(ranker_arg_str, ties, feature_count) for _ in range(n_rankers)] length = 10 (createdList, _) = multil.multileave(rankers, query, length) foundDocs = [d.docid for d in createdList] existingDocs = [q.docid for q in query.get_docids()] assert(set(foundDocs).issubset(set(existingDocs))) assert(len(foundDocs) == length) assert(len(foundDocs) == len(set(foundDocs))) # No duplicates # For next step: self.foundDocs = createdList self.rankers = rankers self.query = query self.multil = multil
def step1_ListCreation(self, n_rankers=3, credits=False): print('Testing step 1: creation of multileaved list') arg_str = "" if (credits): arg_str = "-c True" multil = ml.ProbabilisticMultileave(arg_str) query_fh = cStringIO.StringIO(self.test_queries) queries = qu.Queries(query_fh, self.test_num_features) query = queries[queries.keys()[0]] query_fh.close() ranker_arg_str = ['ranker.model.BM25', '1'] # second arg corresponds to ranker_type.. ties = "random" feature_count = None rankers = [ rnk(ranker_arg_str, ties, feature_count) for _ in range(n_rankers) ] length = 10 (createdList, _) = multil.multileave(rankers, query, length) foundDocs = [d.docid for d in createdList] existingDocs = [q.docid for q in query.get_docids()] assert (set(foundDocs).issubset(set(existingDocs))) assert (len(foundDocs) == length) assert (len(foundDocs) == len(set(foundDocs))) # No duplicates # For next step: self.foundDocs = createdList self.rankers = rankers self.query = query self.multil = multil
def __init__(self, feature_sets, n_features=64, cutoff=10, click_model="navigational", experiment_type=""): self.experiment_type = experiment_type self.n_rankers = len(feature_sets) self.n_features = n_features self.cutoff = cutoff self.train_queries = qu.load_queries(PATH_TRAIN_QUERIES, self.n_features) self.test_queries = qu.load_queries(PATH_TEST_QUERIES, self.n_features) self.samplemultil100 = sbml.SampleBasedProbabilisticMultileaveAS("--n_samples 100") self.samplemultil1000 = sbml.SampleBasedProbabilisticMultileaveAS("--n_samples 1000") self.samplemultil10000 = sbml.SampleBasedProbabilisticMultileaveAS("--n_samples 10000") self.samplemultil100000 = sbml.SampleBasedProbabilisticMultileaveAS("--n_samples 100000") #self.samplemultil10 = sbml.SampleBasedProbabilisticMultileave("--n_samples 10") #self.samplemultil1000 = sbml.SampleBasedProbabilisticMultileave("--n_samples 100") #self.samplemultil10000 = sbml.SampleBasedProbabilisticMultileave("--n_samples 1000") self.multil = ml.ProbabilisticMultileave() self.multil_nonbin = ml.ProbabilisticMultileave("-c True") self.interl = ProbabilisticInterleave('--aggregate binary') self.TeamDraftMultileave = TeamDraftMultileave() self.allrankers = [rnk("1", "random", self.n_features) for _ in range(self.n_rankers)] for feature_ids,ranker in zip(feature_sets,self.allrankers): weights = np.zeros(self.n_features) for fid in feature_ids: weights[fid] = 1 ranker.update_weights(weights) if experiment_type == "sensitivity": ndcg = NdcgEval() average_ndcgs = np.zeros((self.n_rankers)) for query in self.test_queries: for i, ranker in enumerate(self.allrankers): ranker.init_ranking(query) average_ndcgs[i] += ndcg.get_value(ranker.get_ranking(), query.get_labels().tolist(), None, self.cutoff) average_ndcgs /= len(self.test_queries) self.all_true_pref = np.zeros((self.n_rankers, self.n_rankers)) for i in range(self.n_rankers): for j in range(self.n_rankers): self.all_true_pref[i, j] = 0.5 * (average_ndcgs[i] - average_ndcgs[j]) + 0.5 elif experiment_type == "bias": click_model = "random" self.all_true_pref = np.zeros((self.n_rankers, self.n_rankers)) for i in range(self.n_rankers): for j in range(self.n_rankers): self.all_true_pref[i, j] = 0.5 else: raise Exception("Set experiment type") if click_model=="navigational": click_str="--p_click 0:.05, 1:0.95 --p_stop 0:.2, 1:.5" elif click_model=="perfect": click_str="--p_click 0:.0, 1:1. --p_stop 0:.0, 1:.0" elif click_model=="informational": click_str="--p_click 0:.4, 1:.9 --p_stop 0:.1, 1:.5" elif click_model=="random": click_str="--p_click 0:.5, 1:.5 --p_stop 0:.0, 1:.0" self.user_model = CascadeUserModel(click_str)
def __init__(self, feature_sets, n_features=64, cutoff=10, click_model="navigational", experiment_type=""): self.experiment_type = experiment_type self.n_rankers = len(feature_sets) self.n_features = n_features self.cutoff = cutoff self.train_queries = qu.load_queries(PATH_TRAIN_QUERIES, self.n_features) self.test_queries = qu.load_queries(PATH_TEST_QUERIES, self.n_features) self.samplemultil100 = sbml.SampleBasedProbabilisticMultileaveAS( "--n_samples 100") self.samplemultil1000 = sbml.SampleBasedProbabilisticMultileaveAS( "--n_samples 1000") self.samplemultil10000 = sbml.SampleBasedProbabilisticMultileaveAS( "--n_samples 10000") self.samplemultil100000 = sbml.SampleBasedProbabilisticMultileaveAS( "--n_samples 100000") #self.samplemultil10 = sbml.SampleBasedProbabilisticMultileave("--n_samples 10") #self.samplemultil1000 = sbml.SampleBasedProbabilisticMultileave("--n_samples 100") #self.samplemultil10000 = sbml.SampleBasedProbabilisticMultileave("--n_samples 1000") self.multil = ml.ProbabilisticMultileave() self.multil_nonbin = ml.ProbabilisticMultileave("-c True") self.interl = ProbabilisticInterleave('--aggregate binary') self.TeamDraftMultileave = TeamDraftMultileave() self.allrankers = [ rnk("1", "random", self.n_features) for _ in range(self.n_rankers) ] for feature_ids, ranker in zip(feature_sets, self.allrankers): weights = np.zeros(self.n_features) for fid in feature_ids: weights[fid] = 1 ranker.update_weights(weights) if experiment_type == "sensitivity": ndcg = NdcgEval() average_ndcgs = np.zeros((self.n_rankers)) for query in self.test_queries: for i, ranker in enumerate(self.allrankers): ranker.init_ranking(query) average_ndcgs[i] += ndcg.get_value( ranker.get_ranking(), query.get_labels().tolist(), None, self.cutoff) average_ndcgs /= len(self.test_queries) self.all_true_pref = np.zeros((self.n_rankers, self.n_rankers)) for i in range(self.n_rankers): for j in range(self.n_rankers): self.all_true_pref[ i, j] = 0.5 * (average_ndcgs[i] - average_ndcgs[j]) + 0.5 elif experiment_type == "bias": click_model = "random" self.all_true_pref = np.zeros((self.n_rankers, self.n_rankers)) for i in range(self.n_rankers): for j in range(self.n_rankers): self.all_true_pref[i, j] = 0.5 else: raise Exception("Set experiment type") if click_model == "navigational": click_str = "--p_click 0:.05, 1:0.95 --p_stop 0:.2, 1:.5" elif click_model == "perfect": click_str = "--p_click 0:.0, 1:1. --p_stop 0:.0, 1:.0" elif click_model == "informational": click_str = "--p_click 0:.4, 1:.9 --p_stop 0:.1, 1:.5" elif click_model == "random": click_str = "--p_click 0:.5, 1:.5 --p_stop 0:.0, 1:.0" self.user_model = CascadeUserModel(click_str)