def combine_datasets(): poverty_table, _ = utils.read_table("poverty_data_clean.csv", True) crime_table, _ = utils.read_table("crime_data_clean.csv", True) rent_table, _ = utils.read_table("rent_data_no_dups.csv", True) combined_table = [] combined_header = ["County", "State",\ "Pov_Num_All","Pov_Pct_All","Median_Income", \ "Crime_Rate_per_100000","Murder","Rape","Robbery","Aggravated_Assault","Burglary","Larceny","Vehicle_Theft","Arson",\ "Population","Mean_Rent","Median_Rent", "Latitude", "Longitude"] for poverty_row in poverty_table: for crime_row in crime_table: for rent_row in rent_table: if poverty_row[2] == crime_row[0] and crime_row[0] == rent_row[ 3]: new_add = [poverty_row[2], poverty_row[1],\ poverty_row[3], poverty_row[4], poverty_row[9],\ crime_row[2], crime_row[3], crime_row[4], crime_row[5], crime_row[6], crime_row[7], crime_row[8], crime_row[9], crime_row[10], \ crime_row[11], rent_row[9], rent_row[10], rent_row[7], rent_row[8]] combined_table.append(new_add) no_dups = dict(((x[0], x[1]), x) for x in combined_table) new_table = list(no_dups.values()) new_table.insert(0, combined_header) utils.write_table("combined_data.csv", new_table)
def to_neo(namespace): ng = NeoGraph() if not namespace.dont_truncate: ng.truncate() symbols = read_table('symbols') var_coef = read_table('coef_variation') cor = read_table(namespace.corr_id) cor = cor.query('cor == cor') symbols = symbols[(symbols['symbol'].isin(cor['symbol1'])) | (symbols['symbol'].isin(cor['symbol2']))] symbols = symbols.merge(var_coef, on='symbol', how='left') ng.add_companies(symbols) ng.create_links(cor)
def main(): tname = 'graded_performance_v3.csv' students = u.read_table(tname) header = students[0] header_one = students[0][:-6] students = students[1:] k = 10 math_table = pass_fail_all_test(students, 7, header) reading_table = pass_fail_all_test(students, 8, header) writing_table = pass_fail_all_test(students, 9, header) m = k_cross(header_one + ['pass_math'], math_table, k) r = k_cross(header_one + ['pass_reading'], reading_table, k) w = k_cross(header_one + ['pass_writing'], writing_table, k) print( "\n\n-----------------------------------------------------------------" ) print(" Decision Tree Pass/Fail Predictions") print("-----------------------------------------------------------------") print("\n\n Math Scores") print("-----------------------------------------------------------------") c_matrix(m[TP], m[TN], m[FP], m[FN]) print("-----------------------------------------------------------------") print("\n\n Reading Scores") print("-----------------------------------------------------------------") c_matrix(r[TP], r[TN], r[FP], r[FN]) print("-----------------------------------------------------------------") print("\n\n Writing Scores") print("-----------------------------------------------------------------") c_matrix(w[TP], w[TN], w[FP], w[FN])
def proc_depth_info(bed_file, depth_file, depth_list): depth_df = pd.read_table(depth_file, header=None, prefix='x') bed_dt = utils.read_table(bed_file) region_df_list = [] for xx in bed_dt: logger.info(' '.join(xx)) if len(xx) > 3: chrom, start, end, region_name = xx[:4] elif len(xx) == 3: chrom, start, end = xx else: logger.error('bed文件输入错误,请核实') exit() # ipdb.set_trace() region_df = depth_df[(depth_df.iloc[:, 0] == chrom) & (depth_df.iloc[:, 1] >= int(start)) & (depth_df.iloc[:, 1] <= int(end))] region_cov_df = get_coverage_depth(region_df, depth_list) length = int(end) - int(start) + 1 region_cov_df = region_cov_df / length region_mean_df = region_df.iloc[:, 2:].sum() / length region_cov_df.loc['mean_depth', :] = region_mean_df region_cov_df.loc['region', :] = '%s:%s-%s' % (chrom, start, end) if len(xx) > 3: region_cov_df.loc['region_name', :] = region_name region_cov_df = region_cov_df.transpose() region_df_list.append(region_cov_df) # ipdb.set_trace() return pd.concat(region_df_list)
def main(): tname = 'StudentsPerformance.csv' students = u.read_table(tname) header = students[0][:-3] + ['AvgScore'] students = group_scores(students[1:]) # t_head, t_tab = clean_titanic(tname) out = k_cross(header, students, 10)
def do_arm(): data, header = utils.read_table("combined_data_discretized.csv", True) rules = arm.Association_Rule_Mining(data, header, minsup=.3, minconf=.95, columns=[2, 3, 4, 5, 7]) utils.rules_pretty_print(rules, "Association Rule Mining for Crime Rate Factors")
def test_knn(): data, _ = utils.read_table("combined_data_normalized.csv", True) class_index = 4 predictors = [2, 3, 5, 9] results = knn.knn_classifier(data, class_index, predictors, 5, 5) accuracy = utils.compute_accuracy(results) print(accuracy) utils.confusion_matrix(results, "Crime Rate?", "KNN Classifier Prediction of Crime Rate")
def test_naive_bayes(): data, header = utils.read_table("combined_data_normalized.csv", True) class_index = 4 predictors = [2, 3, 5, 7] results = bayes.naive_bayes_classifier(data, header, 10, class_index, predictors, [2, 3, 5, 9]) accuracy = utils.compute_accuracy(results) print(accuracy) utils.confusion_matrix(results, "Crime Rate?", "Naive Bayes Classifier Prediction of Crime Rate")
def get_tileset(tileset_name, index=-1, override_offset=-1): offsets = utils.read_table('scripts/res/meta_tileset_load_offsets.tbl') base_offset = 0 if override_offset == -1: if index == -1: idx_tbl = utils.read_table('scripts/res/meta_tileset_index.tbl') hits = [idx for idx in idx_tbl if idx_tbl[idx] == tileset_name] if len(hits) != 1: raise f"Found more or less than one entry for {tileset_name}, provide an index if it appears more than once" index = hits[0] base_offset = (int(offsets[index], 16) // 0x10) & 0xFF else: base_offset = override_offset tbl = utils.read_list(f'scripts/res/tilesets/{tileset_name}.lst', base_offset) tbl[0] = ' ' return tbl
def test_random_forest(): data, header = utils.read_table("combined_data_discretized.csv", True) class_index = 4 predictors = [2, 3, 5, 9] results = rforest.random_forest_classifier(data, header, class_index, predictors, 100, 25, 3) accuracy = utils.compute_accuracy(results) print(accuracy) utils.confusion_matrix( results, "Crime Rate?", "Random Forest Classifier Prediction of Crime Rate")
def test_decision_tree(): data, header = utils.read_table("combined_data_discretized.csv", True) class_index = 4 predictors = [2, 3, 5, 9] results = dtree.decision_tree_classifier(data, header, class_index, predictors, 30) accuracy = utils.compute_accuracy(results) print(accuracy) utils.confusion_matrix( results, "Crime Rate?", "Decision Tree Classifier Prediction of Crime Rate")
def prepare_frames(split_name): """ extract fraems for dataset `split_name`. pickle to disk. frames_info[samp_id] |- image: image path |- trans: word label (without space) `- frames: frame images filename list e.g. frames_info[train_0]: { 'frames': ['0.jpg', '1.jpg', '2.jpg', '3.jpg', '4.jpg', ...], 'image': '/home/paile/research/mjsynth/mnt/ramdisk/max/90kDICT32px/378/5/54_amethyst_2482.jpg', 'trans': 'amethyst' } """ print 'processing:', split_name, '-------------------------' # read split info split = config.splits[split_name] trans_path = split.trans_pth frames_dir = split.frames_dir if path.isdir(frames_dir): glog.warning('%s already exists.', frames_dir) sys.exit(-1) print 'trans_path:', trans_path print 'frames_dir:', frames_dir # read trans all_trans = utils.read_table(trans_path) # assign sample id trans_ids = ['%s_%d' % (split_name, i) for i in range(len(all_trans))] all_trans = OrderedDict(zip(trans_ids, all_trans)) # frames info frames_info = OrderedDict() err_images = [] for samp_id in all_trans: try: image_pth = all_trans[samp_id][0] im = load_image(image_pth) frame_pths = extract_frames(im, samp_id, frames_dir) frames_info[samp_id] = {} frames_info[samp_id]['image'] = image_pth frames_info[samp_id]['trans'] = all_trans[samp_id][1] frames_info[samp_id]['frames'] = frame_pths except Exception, e: err_images.append(image_pth+'\n') print 'Error:', image_pth
def normalize_combined(): combined_table, header = utils.read_table("combined_data.csv", True) columns = [] new_header = [] for x in range(len(header)): if x not in [2, 6, 7, 8, 9, 10, 11, 12, 13, 16]: new_header.append(header[x]) columns.append(utils.get_column(combined_table, x)) columns.append([ round(columns[6][i] * 12 * 100 / columns[3][i], 1) for i in range(len(columns[0])) ]) new_header.append("Pct_Income_as_Rent") columns[2] = normalize_data(columns[2]) # Poverty columns[3] = normalize_data(columns[3]) # Median Income columns[4] = discretize_data(columns[4], 5) # Crime Rate columns[5] = normalize_data(columns[5]) # Population columns[6] = normalize_data(columns[6]) # Rent columns[7] = normalize_data(columns[7]) # Rent as percent of income. new_table = [] for x in range(len(columns[0])): buffer = [] for column in columns: buffer.append(column[x]) new_table.append(buffer) new_table.insert(0, new_header) utils.write_table("combined_data_normalized.csv", new_table) columns[2] = discretize_data(columns[2], 3) # Poverty columns[3] = discretize_data(columns[3], 3) # Median Income #columns[4] = discretize_data(columns[4], 3) # Crime Rate columns[5] = discretize_data(columns[5], 5) # Population columns[6] = discretize_data(columns[6], 3) # Rent columns[7] = discretize_data(columns[7], 5) # Rent as percent of income. new_table = [] for x in range(len(columns[0])): buffer = [] for column in columns: buffer.append(column[x]) new_table.append(buffer) new_table.insert(0, new_header) utils.write_table("combined_data_discretized.csv", new_table)
def main(): # import table, get header, add average scores table_name = 'StudentsPerformance.csv' students = utils.read_table(table_name) header = students.pop(0) clean_data(students, header) # initialize variables n = 20 # number of decision trees to generate m = 15 # number of best trees to save # for each class label, create forest and classify test instances to get accuracy class_labels = [ "reading score class", "writing score class", "math score class" ] for label in class_labels: classify_using_forest(students, header, n, m, label)
def __init__(self, data_path, c): import shutil self.data_path = data_path self.c = c self.output_path = _p.join('result', self.data_path) utils.mkdir_p(self.output_path) self.cgmdir = _p.join(self.data_path, 'cgm') self.spiketime_dir = _p.join(self.data_path, 'spiketime') self.single_data = utils.read_table(_p.join(self.data_path,'statid_correlation_mu_sigma2_urate.dat')) shutil.copyfile(_p.join(self.data_path,'statid_correlation_mu_sigma2_urate.dat'), _p.join(self.output_path,'statid_correlation_mu_sigma2_urate.dat')) self.zero_firing = [i for i in self.single_data if self.single_data[i]['urate']<1e-8] self.pairs = utils.read_pairs(self.cgmdir) self.pair_data = [{'id': p} for p in self.pairs]
def main(): table = utils.read_table("StudentsPerformance.csv") header = table.pop(0) clean_data(table, header) # make prediction about math score class class_labels = [ "writing score class", "reading score class", "math score class" ] attributes = [ 'gender', 'race/ethnicity', 'parental level of education', 'lunch', 'test preparation course' ] for i, label in enumerate(["Writing", "Reading", "Math"]): k = 100 subsampling_accuracy(table, header, k, class_labels[i], attributes, label)
def __init__(self, mode, preprocess=DeepLoc.default_preprocessing): self.data = [] self.preprocess = preprocess if mode not in ("train", "validation", "test"): raise ValueError( "Only 'train', 'validation' and 'test' modes are available.") mode_path = os.path.join("DeepLocAugmented", mode) poses_path = os.path.join(mode_path, "poses.txt") for image_path, x, y, z, qw, qx, qy, qz in read_table( poses_path, types=(str, float, float, float, float, float, float, float), delimiter="\t"): pose = (x, y, z, qw, qx, qy, qz) self.data.append((image_path, pose)) self.size = len(self.data)
def __init__(self, mode, preprocess=default_preprocessing): self.data = [] self.preprocess = preprocess if mode not in ("train", "test"): raise ValueError("Only 'train' and 'test' modes are available.") mode_path = os.path.join("DeepLoc", mode) poses_path = os.path.join(mode_path, "poses.txt") for filename, x, y, z, qw, qx, qy, qz in read_table( poses_path, types=(str, float, float, float, float, float, float, float), delimiter=" "): pose = (x, y, z, qw, qx, qy, qz) image_path = os.path.join(mode_path, "LeftImages", "{}.png".format(filename)) self.data.append((image_path, pose)) self.size = len(self.data)
def data_vis(): data, header = utils.read_table("combined_data.csv", True) x_data, y_data = utils.get_column(data, 3), utils.get_column(data, 5) dv.scatter_plot(x_data, y_data, "Poverty Levels v. Crime Rate", "Poverty Levels (%)", "Crime Rate per 100,000 people", 10, "Poverty_v_Crime_graphed.png", 100, True)
def stats(): data, header = utils.read_table("combined_data.csv", True) for row in data: if row[6] == min([x for x in utils.get_column(data, 6) if x != 0]): print(row)
def main(): table = utils.read_table("auto-data-clean.txt") for row in table: del row[-2] utils.write_table("auto-data-no-names.txt", table)
def __init__(self, set_path, mode, preprocess=default_preprocessing, augment=True, only_front_camera=False, split="manual", return_image_paths=False): self.data = [] self.preprocess = preprocess self.augment = augment self.only_front_camera = only_front_camera self.return_image_paths = return_image_paths print("Data:", self.data) print("Augment:", self.augment) print("OFC:", self.only_front_camera) print("RIP:", self.return_image_paths) print("---------------------") if mode not in ("train", "validation", "test", "visualize"): raise ValueError("Invalid mode.") self.mode = mode origin_path = os.path.join(set_path, "origin.txt") for line in lines(origin_path): self.origin = torch.Tensor(tuple(map(float, line.split(" ")))) #camera_paths = [ # os.path.join("front", "center"), # os.path.join("front", "left"), # os.path.join("front", "right"), # os.path.join("back", "center"), # os.path.join("back", "left"), #os.path.join("back", "right") #] #for camera_path in camera_paths: # poses_path = os.path.join(mode_path, camera_path, "poses.txt")''' if mode == "visualize": mode_path = os.path.join(set_path, "front", "center") poses_path = os.path.join(mode_path, "poses.txt") for filename, x, y, qw, qx, qy, qz in read_table( poses_path, types=(str, float, float, float, float, float, float), delimiter=" "): _, _, theta = euler_from_quaternion((qw, qx, qy, qz)) assert theta >= -np.pi and theta <= np.pi ''' Normalization ''' # This makes x and y independent of the origin x, y, _ = torch.Tensor([x, y, 0]) + self.origin x, y, theta = PerceptionCarDataset.normalize(x, y, theta) '''''' pose = (x, y, theta) image_path = os.path.join(mode_path, filename) # image_path = os.path.join(set_path, filename) self.data.append((image_path, pose)) else: poses_path = os.path.join(set_path, "{}.{}.txt".format(mode, split)) for *filenames, x, y, qw, qx, qy, qz in read_table( poses_path, types=(str, str, str, str, str, str, float, float, float, float, float, float), delimiter=" "): _, _, theta = euler_from_quaternion((qw, qx, qy, qz)) assert theta >= -np.pi and theta <= np.pi ''' Normalization ''' # This makes x and y independent of the origin x, y, _ = torch.Tensor([x, y, 0]) + self.origin x, y, theta = PerceptionCarDataset.normalize(x, y, theta) '''''' pose = (x, y, np.cos(theta), np.sin(theta)) image_paths = map(lambda fn: os.path.join(set_path, fn), filenames) #image_paths = filenames if self.only_front_camera: self.data.append((next(image_paths), pose)) else: self.data.append((*image_paths, pose)) self.size = len(self.data)
import model import memory_based import model_based_nmf import model_based_svd import metrics import split_test import utils import random import time if __name__ == '__main__': input_arguments = { "Historic Data": utils.read_table(sys.argv[1], ':', ','), "Prediction Data": utils.read_table(sys.argv[2], ':') } output_file = sys.argv[3] start = time.time() test = True #input_arguments['Historic Data'] = random.sample(input_arguments['Historic Data'], int(len(input_arguments['Historic Data']) * 0.1) ) if test: for test in range(0, 10):
elif args.policy_name == "OurDDPG": policy = OurDDPG.DDPG(state_dim, action_dim, max_action) elif args.policy_name == "DDPG": policy = DDPG.DDPG(state_dim, action_dim, max_action) policy.load("%s" % (file_name), directory=model_dir) if args.save_video: fourcc = cv2.VideoWriter_fourcc(*'mp4v') video_name = video_dir + '/{}_TD3_{}.mp4'.format( datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), args.env_name) out_video = cv2.VideoWriter(video_name, fourcc, 60.0, (640, 480)) print(video_name) human_joint_angle = utils.read_table() for i in range(1): obs = env.reset() done = False pre_foot_contact = 1 foot_contact = 1 foot_contact_vec = np.asarray([1, 1, 1]) gait_num = 0 joing_angle_list = [] coe_list = [] joint_angle = np.zeros((0, 6)) while not done: action = policy.select_action(np.array(obs)) obs, reward, done, _ = env.step(action) utils.fifo_list(foot_contact_vec, obs[-2]) if 0 == np.std(foot_contact_vec):