def test_matrix_from_list_as_column_vector(): m = matrix([0, 1, 2]) assert m.nr() == 3 assert m.nc() == 1 assert m.shape == (3, 1) assert len(m) == 3 assert repr(m) == "< dlib.matrix containing: \n0 \n1 \n2 >" assert str(m) == "0 \n1 \n2"
def test_matrix_empty_init(): m = matrix() assert m.nr() == 0 assert m.nc() == 0 assert m.shape == (0, 0) assert len(m) == 0 assert repr(m) == "< dlib.matrix containing: >" assert str(m) == ""
def test_matrix_from_object_with_2d_shape(): m1 = numpy.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]]) m = matrix(m1) assert m.nr() == 3 assert m.nc() == 3 assert m.shape == (3, 3) assert len(m) == 3 assert repr(m) == "< dlib.matrix containing: \n0 1 2 \n3 4 5 \n6 7 8 >" assert str(m) == "0 1 2 \n3 4 5 \n6 7 8"
def test_matrix_set_size(): m = matrix() m.set_size(5, 5) assert m.nr() == 5 assert m.nc() == 5 assert m.shape == (5, 5) assert len(m) == 5 assert repr(m) == "< dlib.matrix containing: \n0 0 0 0 0 \n0 0 0 0 0 \n0 0 0 0 0 \n0 0 0 0 0 \n0 0 0 0 0 >" assert str(m) == "0 0 0 0 0 \n0 0 0 0 0 \n0 0 0 0 0 \n0 0 0 0 0 \n0 0 0 0 0" deser = pickle.loads(pickle.dumps(m, 2)) for row in range(5): for col in range(5): assert m[row][col] == deser[row][col]
def test_matrix_from_list(): m = matrix([[0, 1, 2], [3, 4, 5], [6, 7, 8]]) assert m.nr() == 3 assert m.nc() == 3 assert m.shape == (3, 3) assert len(m) == 3 assert repr(m) == "< dlib.matrix containing: \n0 1 2 \n3 4 5 \n6 7 8 >" assert str(m) == "0 1 2 \n3 4 5 \n6 7 8" deser = pickle.loads(pickle.dumps(m, 2)) for row in range(3): for col in range(3): assert m[row][col] == deser[row][col]
c = a a = b b = c d = list() for i in range(len(a)): d.append(list()) for j in range(len(b)): d[-1].append(int(10 * (dist1(a[i:], b[j:]) + dist1(a[i::-1], b[j::-1])))) return d # print(distM(textA, textB)) # print(textA[0] == textB[1]) cost = dlib.matrix(distM(textA, textB)) print("cost matrix ready") before = datetime.datetime.now() assignment = dlib.max_cost_assignment(cost) after = datetime.datetime.now() print(textA) print(textB) print(ruler) # This prints optimal assignments: [2, 0, 1] # which indicates that we should assign the person from the first row of the # cost matrix to job 2, the middle row person to job 0, and the bottom row # person to job 1. print("Optimal assignments: {}".format(assignment)) print(after - before)
print "%d source / %d target docs" \ % (n_source, n_target) print datetime.now() if args.matching: print "Finding best matching" matching_pairs = set() full_matrix = np.pad( score_matrix, ((0, max(score_matrix.shape) - score_matrix.shape[0]), (0, max(score_matrix.shape) - score_matrix.shape[1])), mode='constant') cost = dlib.matrix(full_matrix) print "Searching with dlib" assignment = dlib.max_cost_assignment(cost) for sidx, tidx in enumerate(assignment): if sidx >= score_matrix.shape[0] or tidx >= score_matrix.shape[1]: continue matching_pairs.add((sidx, tidx)) print "Found %d matches " % (len(matching_pairs)) found = devset.intersection(matching_pairs) print "Found %d out of %d pairs = %f%%" \ % (len(found), len(devset), 100. * len(found) / len(devset)) print "RES:\t%s\t%s\t%d\t%d" % (args.prefix, args.matrix.name, len(found), len(devset)) else:
def test_matrix_from_object_without_shape(): with raises(AttributeError): matrix("invalid")
def test_matrix_from_object_without_2d_shape(): with raises(IndexError): m1 = numpy.array([0, 1, 2]) matrix(m1)
c = a a = b b = c d = list() for i in range(len(a)): d.append(list()) for j in range(len(b)): d[-1].append(int(10 * ( dist1(a[i:], b[j:]) + dist1(a[i::-1], b[j::-1]) ))) return d #print(distM(textA, textB)) #print(textA[0] == textB[1]) cost = dlib.matrix(distM(textA, textB)) print('cost matrix ready') before = datetime.datetime.now() assignment = dlib.max_cost_assignment(cost) after = datetime.datetime.now() print(textA) print(textB) print(ruler) # This prints optimal assignments: [2, 0, 1] # which indicates that we should assign the person from the first row of the # cost matrix to job 2, the middle row person to job 0, and the bottom row # person to job 1. print("Optimal assignments: {}".format(assignment)) print(after - before)
def test_matrix_from_list_with_invalid_rows(): with raises(ValueError): matrix([[0, 1, 2], [3, 4], [5, 6, 7]])
print datetime.now() matches = [] if args.matching: print "Finding best matching" full_matrix = np.pad( score_matrix, ((0, max(score_matrix.shape) - score_matrix.shape[0]), (0, max(score_matrix.shape) - score_matrix.shape[1])), mode='constant') # print full_matrix.shape, np.sum(full_matrix) # print score_matrix.shape, np.sum(score_matrix) import dlib cost = dlib.matrix(full_matrix) print "Searching with dlib" assignment = dlib.max_cost_assignment(cost) # print assignment for sidx, tidx in enumerate(assignment): if sidx >= score_matrix.shape[0] or tidx >= score_matrix.shape[1]: continue matches.append((score_matrix[sidx, tidx], sidx, tidx)) matches.sort(reverse=True) matches = [(sidx, tidx) for score, sidx, tidx in matches] else: print "Finding best match (greedy / restricted + argsort)" seen_cols = set()
def match(input_matrix, allow_zero_scores=False): """ Builds match list :param input_matrix: 2 dimensional array of scores :param allow_zero_scores: Should items with a score of 0 be considered to be matched? Default is False :return: """ out_dict = dict() out_dict["Matches"] = [] out_dict["Details"] = {"Quality Scores": []} out_dict["Quality Score"] = 0 out_dict["Precision"] = 0 out_dict["Recall"] = 0 out_dict["F1"] = 0 out_dict["Mercury Score"] = 0 warn_id_list = list(input_matrix.index) event_id_list = list(input_matrix.columns) score_matrix = np.array(input_matrix) if min(score_matrix.shape) == 0: pass else: if np.max(score_matrix) <= 0: pass else: score_matrix = score_matrix.copy(order="C") row_count, col_count = score_matrix.shape zero_matrix = np.zeros(score_matrix.shape) score_matrix = np.maximum(score_matrix, zero_matrix) k_max = max(score_matrix.shape[0], score_matrix.shape[1]) score_matrix_square = np.zeros((k_max, k_max)) score_matrix_square[:score_matrix.shape[0], :score_matrix. shape[1]] = score_matrix assign = dlib.max_cost_assignment( dlib.matrix(score_matrix_square)) assign = [(i, assign[i]) for i in range(k_max)] assign_scores = np.array( [score_matrix_square[x[0], x[1]] for x in assign]) assign = np.array(assign) #assign = assign[assign_scores > 0] if not allow_zero_scores: assign_scores = np.array( [score_matrix_square[x[0], x[1]] for x in assign]) assign = np.array(assign) assign = assign[assign_scores > 0] assign = list([tuple(x) for x in assign]) assign = [(int(x[0]), int(x[1])) for x in assign] assign = [(warn_id_list[a[0]], event_id_list[a[1]]) for a in assign] out_dict["Matches"] = assign scores_ = assign_scores[assign_scores > 0] out_dict["Quality Score"] = np.mean(scores_) out_dict["Details"] = {"Quality Scores": list(scores_)} prec = len(assign) / row_count rec = len(assign) / col_count out_dict["Precision"] = prec out_dict["Recall"] = rec out_dict["F1"] = Scorer.f1(prec, rec) out_dict["Mercury Score"] = (out_dict["Quality Score"] + out_dict["F1"]) / 2 return out_dict
# Let's imagine you need to assign N people to N jobs. Additionally, each # person will make your company a certain amount of money at each job, but each # person has different skills so they are better at some jobs and worse at # others. You would like to find the best way to assign people to these jobs. # In particular, you would like to maximize the amount of money the group makes # as a whole. This is an example of an assignment problem and is what is solved # by the dlib.max_cost_assignment() routine. # So in this example, let's imagine we have 3 people and 3 jobs. We represent # the amount of money each person will produce at each job with a cost matrix. # Each row corresponds to a person and each column corresponds to a job. So for # example, below we are saying that person 0 will make $1 at job 0, $2 at job 1, # and $6 at job 2. cost = dlib.matrix([[1, 2, 6], [5, 3, 6], [4, 5, 0]]) # To find out the best assignment of people to jobs we just need to call this # function. assignment = dlib.max_cost_assignment(cost) # This prints optimal assignments: [2, 0, 1] # which indicates that we should assign the person from the first row of the # cost matrix to job 2, the middle row person to job 0, and the bottom row # person to job 1. print("Optimal assignments: {}".format(assignment)) # This prints optimal cost: 16.0 # which is correct since our optimal assignment is 6+5+5. print("Optimal cost: {}".format(dlib.assignment_cost(cost, assignment)))
import dlib # Lets imagine you need to assign N people to N jobs. Additionally, each person will make # your company a certain amount of money at each job, but each person has different skills # so they are better at some jobs and worse at others. You would like to find the best way # to assign people to these jobs. In particular, you would like to maximize the amount of # money the group makes as a whole. This is an example of an assignment problem and is # what is solved by the dlib.max_cost_assignment() routine. # So in this example, lets imagine we have 3 people and 3 jobs. We represent the amount of # money each person will produce at each job with a cost matrix. Each row corresponds to a # person and each column corresponds to a job. So for example, below we are saying that # person 0 will make $1 at job 0, $2 at job 1, and $6 at job 2. cost = dlib.matrix([[1, 2, 6], [5, 3, 6], [4, 5, 0]]) # To find out the best assignment of people to jobs we just need to call this function. assignment = dlib.max_cost_assignment(cost) # This prints optimal assignments: [2, 0, 1] # which indicates that we should assign the person from the first row of the cost matrix to # job 2, the middle row person to job 0, and the bottom row person to job 1. print "optimal assignments: ", assignment # This prints optimal cost: 16.0 # which is correct since our optimal assignment is 6+5+5.