def segment_driver(driver_id): ''' this generated the segments in settings.SEGMENTS_FOLDER[1] ''' da = DataAccess() for ride_id_minus_1, ride in enumerate(da.get_rides(driver_id)): ride_id = ride_id_minus_1 + 1 if da.skip_segment(driver_id, ride_id): continue # apply the Ramer-Douglas-Peucker algorithm ride = [p + [i] for i, p in enumerate(smoothen(ride))] # enrich with timestamp ride = rdp(ride, epsilon=10) lengths = [util.euclidian_distance(ride[i-1], ride[i]) for i in xrange(1, len(ride))] times = [ride[i][2] - ride[i-1][2] for i in xrange(1, len(ride))] angles = [util.get_angle(ride[i-2], ride[i-1], ride[i]) for i in xrange(2, len(ride))] # bucket the values lengths = util.bucket(np.log(lengths), 25, [2.2,8]) # [int(l) for l in lengths] times = util.bucket(np.log(times), 20, [1,5.5]) # [int(t) for t in times] angles = util.bucket(angles, 30, [0,180]) # [int(a) for a in angles] # write results da.write_ride_segments(driver_id, ride_id, lengths, times, angles) logging.info('finished segmenting driver %s' % driver_id)
def segment_driver(driver_id): ''' this generated the segments in settings.SEGMENTS_FOLDER[1] ''' da = DataAccess() for ride_id_minus_1, ride in enumerate(da.get_rides(driver_id)): ride_id = ride_id_minus_1 + 1 if da.skip_segment(driver_id, ride_id): continue # apply the Ramer-Douglas-Peucker algorithm ride = [p + [i] for i, p in enumerate(smoothen(ride))] # enrich with timestamp ride = rdp(ride, epsilon=10) lengths = [ util.euclidian_distance(ride[i - 1], ride[i]) for i in xrange(1, len(ride)) ] times = [ride[i][2] - ride[i - 1][2] for i in xrange(1, len(ride))] angles = [ util.get_angle(ride[i - 2], ride[i - 1], ride[i]) for i in xrange(2, len(ride)) ] # bucket the values lengths = util.bucket(np.log(lengths), 25, [2.2, 8]) # [int(l) for l in lengths] times = util.bucket(np.log(times), 20, [1, 5.5]) # [int(t) for t in times] angles = util.bucket(angles, 30, [0, 180]) # [int(a) for a in angles] # write results da.write_ride_segments(driver_id, ride_id, lengths, times, angles) logging.info('finished segmenting driver %s' % driver_id)
def run_model(): data, true_labels = ldl.get_data_linear() true_buckets = [util.bucket(t) for t in true_labels] data = np.tile(data, (DATA_MULTIPLIER, 1)) print("DATA SHAPE:", data.shape) true_buckets = np.tile(true_buckets, DATA_MULTIPLIER) # tuples of (batch_id, total regret, error while training, eval error, precision, recall) batch_results = [] for T in range(NUM_BATCHES): model = Lin_UCB(ALPHA) #model = LASSO_BANDIT() if False: data, true_labels, columns_dict, values_dict = dl.get_data() true_buckets = [util.bucket(t) for t in true_labels] #model = Fixed_Dose(columns_dict, values_dict) #model = Warfarin_Clinical_Dose(columns_dict, values_dict) #model = Warfarin_Pharmacogenetic_Dose(columns_dict, values_dict) batch_id = str(random.randint(100000, 999999)) print() print("Start Batch: ", batch_id) zipped_data = list(zip(data, true_buckets)) random.shuffle(zipped_data) data, true_buckets = zip(*zipped_data) data = np.array(data) model.train(data, true_buckets) pred_buckets = model.evaluate(data) print(batch_id, "Performance on " + str(model)) acc, precision, recall = util.evaluate_performance( pred_buckets, true_buckets) print("\tAccuracy:", acc) print("\tPrecision:", precision) print("\tRecall:", recall) plot_regret(model.regret, ALPHA, batch_id) plot_error_rate(model.error_rate, ALPHA, batch_id) batch_results.append( (batch_id, model.get_regret()[-1], model.get_error_rate()[-1], 1 - acc, precision, recall)) with open('batch/regret' + str(model) + batch_id, 'wb') as fp: pickle.dump(model.regret, fp) with open('batch/error' + str(model) + batch_id, 'wb') as fp: pickle.dump(model.error_rate, fp) return batch_results
def evaluate_datum(self, datum): dose = 5.6044 dose -= 0.2614 * datum[self.columns_dict['Age']] dose += 0.0087 * datum[self.columns_dict['Height (cm)']] dose += 0.0128 * datum[self.columns_dict['Weight (kg)']] vk_gene = 'VKORC1 genotype: -1639 G>A (3673); chr16:31015190; rs9923231; C/T' vk_gene2 = 'VKORC1 QC genotype: -1639 G>A (3673); chr16:31015190; rs9923231; C/T' dose -= 0.8677 * (datum[self.columns_dict[vk_gene]] == self.values_dict[vk_gene]['A/G'] or datum[self.columns_dict[vk_gene2]] == self.values_dict[vk_gene2]['A/G']) dose -= 1.6974 * (datum[self.columns_dict[vk_gene]] == self.values_dict[vk_gene]['A/A'] or datum[self.columns_dict[vk_gene2]] == self.values_dict[vk_gene2]['A/A']) dose -= 0.4854 * (datum[self.columns_dict[vk_gene]] == self.values_dict[vk_gene]['NA'] and datum[self.columns_dict[vk_gene2]] == self.values_dict[vk_gene2]['NA']) dose -= 0.5211 * datum[ self.columns_dict['CYP2C9 consensus']] == self.values_dict[ 'CYP2C9 consensus']['*1/*2'] dose -= 0.9357 * datum[ self.columns_dict['CYP2C9 consensus']] == self.values_dict[ 'CYP2C9 consensus']['*1/*3'] dose -= 1.0616 * datum[ self.columns_dict['CYP2C9 consensus']] == self.values_dict[ 'CYP2C9 consensus']['*2/*2'] dose -= 1.9206 * datum[ self.columns_dict['CYP2C9 consensus']] == self.values_dict[ 'CYP2C9 consensus']['*2/*3'] dose -= 2.3312 * datum[ self.columns_dict['CYP2C9 consensus']] == self.values_dict[ 'CYP2C9 consensus']['*3/*3'] dose -= 0.2188 * datum[self.columns_dict[ 'CYP2C9 consensus']] == self.values_dict['CYP2C9 consensus']['NA'] dose -= 0.1092 * (datum[self.columns_dict['Race']] == self.values_dict['Race']['Asian']) dose -= 0.2760 * ( datum[self.columns_dict['Race']] == self.values_dict['Race']['Black or African American']) dose -= 0.1032 * (datum[self.columns_dict['Race']] == self.values_dict['Race']['NA']) dose += 1.1816 * self._get_enzyme_inducer_status(datum) #Enzyme inducer status dose -= 0.5503 * datum[self.columns_dict['Amiodarone (Cordarone)']] # dose calculated in appx.pdf states that it's the sqrt of weekly return util.bucket(dose**2)
def evaluate_datum(self, datum): dose = 4.0376 dose -= 0.2546 * datum[self.columns_dict['Age']] dose += 0.0118 * datum[self.columns_dict['Height (cm)']] dose += 0.0134 * datum[self.columns_dict['Weight (kg)']] dose -= 0.6752 * (datum[self.columns_dict['Race']] == self.values_dict['Race']['Asian']) dose += 0.4060 * ( datum[self.columns_dict['Race']] == self.values_dict['Race']['Black or African American']) dose += 0.0443 * (datum[self.columns_dict['Race']] == self.values_dict['Race']['NA']) dose += 0.0443 * (datum[self.columns_dict['Race']] == self.values_dict['Race']['Unknown']) dose += 1.2799 * self._get_enzyme_inducer_status(datum) dose -= 0.5695 * (datum[self.columns_dict['Amiodarone (Cordarone)']] == self.values_dict['Amiodarone (Cordarone)']['1']) # dose calculated in appx.pdf states that it's the sqrt of weekly return util.bucket(dose**2)
for i in range(len(data)): labels[i] = self._evaluate_datum(data[i]) return labels def _evaluate_datum(self, features): action_t = -1 best_reward = float('-inf') for arm in range(self.K): predicted_arm_reward = self._predict_reward(arm, features) if predicted_arm_reward > best_reward: action_t = arm best_reward = predicted_arm_reward assert action_t != -1, "[eval datum] No arm was selected..." return action_t # probably do not run this for decent results (below data isn't randomized) # execute 'run_batches.py' instead on the lasso bandit model. if __name__ == '__main__': data, true_labels = ldl.get_data_linear() true_buckets = [util.bucket(t) for t in true_labels] lasso_bandit = LASSO_BANDIT() lasso_bandit.train(data, true_buckets) pred_buckets = lasso_bandit.evaluate(data) acc = util.get_accuracy_bucketed(pred_buckets, true_buckets) print("accuracy on LASSO bandit: " + str(acc)) #plot_regret(lasso_bandit.regret, ALPHA) #plot_error_rate(lasso_bandit.error_rate, ALPHA)
def evaluate_datum(self, datum): return util.bucket(35)