def generate_condition(self): if self.role == Condition.UseNative: return RandomData.random_choice(self.UseNative_Roles) elif self.role == Condition.UseData: return RandomData.random_choice(self.UseData_Roles) else: raise Exception("role error")
def test_kd_tree(n, D, n_test, alphas): """ Tests the query time and distance for a random data set and test set @param n: int - the number of points of the dataset @param D: int - the dimension of the data points @param n_test: int - the number of points to test @param alphas: [float] - a set of alphas to test @return [TestResult] array of objects of class TestResult, which has the average time and distance for a single query """ documents = RandomData.random_dataset(n, D) test_documents = RandomData.random_dataset(n_test, D) rand_tree = KDTree(D) for i, document in documents.iteritems(): key = [document.get(idx) for idx in xrange(0, D)] rand_tree.insert(key, i) print "Finished making random tree." times = [] for alpha in alphas: print "Running for alpha", alpha start_time = time.clock() cum_dist = 0.0 print "Setting up test documents or whatever this is.." for i, test_document in test_documents.iteritems(): key = [test_document.get(idx) for idx in xrange(0, D)] doc_id = rand_tree.nearest(key, alpha) cum_dist += EvalUtil.distance(test_document, documents[doc_id]) print "Finished." duration = time.clock() - start_time times.append( TestResult("KDTree", n, D, alpha, duration / n_test, cum_dist / n_test)) return times
def test_lsh(n, D, n_test, alphas): """ Tests the query time and distance for a random data set and test set @param n: int - the number of points of the dataset @param D: int - the dimension of the data points @param n_test: int - the number of points to test @param alphas: [float] - a set of alphas to test @return [TestResult] array of objects of class TestResult, which has the average time and distance for a single query """ documents = RandomData.random_dataset(n, D) test_documents = RandomData.random_dataset(n_test, D) times = [] for m in ms: lsh = LocalitySensitiveHash(documents, D, m) print "Finished making locally sensitive hash." print "Running for", m, "projections..." start_time = time.clock() cum_dist = 0.0 print "Running for the test documents..." for i, test_document in test_documents.iteritems(): key = [test_document.get(idx) for idx in xrange(0, D)] doc = lsh.nearest_neighbor(test_document, 3) doc_id = doc.doc_id cum_dist += EvalUtil.distance(test_document, documents[doc_id]) print "Finished." duration = time.clock() - start_time times.append( TestResult("LSH", n, D, m, duration / n_test, cum_dist / n_test)) return times
def choice(): return RandomData.random_choice(ConditionConnection.Kinds)
def index_value(index=None, *args): if index is None: index = RandomData.random_choice(range(len(args))) ans = args[index] return '%s里第%s个内容' % ('、'.join(map(str, args)), index + 1), ans
def __init__(self, role): self.UseNative_Roles = [ ("今年年份", RandomData.current_year()), ("本月月份", RandomData.current_month()), ("今天几号", RandomData.current_day()), ("今天周几[1-7]", RandomData.current_weekday()), ("北京奥运那年", "2008"), Calc.index_value( None, *[RandomData.get_letter_digit() for i in range(4)]), ] self.UseData_Roles = [ Calc.sum(*[RandomData.get_int() for i in range(4)]), Calc.get_even(*[RandomData.get_int() for i in range(4)]), Calc.get_odd(*[RandomData.get_int() for i in range(4)]), Calc.max(*[RandomData.get_int() for i in range(4)]), Calc.min(*[RandomData.get_int() for i in range(4)]), ] if role == self.Any: role = RandomData.random_choice([self.UseData, self.UseNative]) self.role = role