def combine_datasets():

    poverty_table, _ = utils.read_table("poverty_data_clean.csv", True)
    crime_table, _ = utils.read_table("crime_data_clean.csv", True)
    rent_table, _ = utils.read_table("rent_data_no_dups.csv", True)
    combined_table = []
    combined_header = ["County", "State",\
        "Pov_Num_All","Pov_Pct_All","Median_Income", \
        "Crime_Rate_per_100000","Murder","Rape","Robbery","Aggravated_Assault","Burglary","Larceny","Vehicle_Theft","Arson",\
        "Population","Mean_Rent","Median_Rent", "Latitude", "Longitude"]

    for poverty_row in poverty_table:
        for crime_row in crime_table:
            for rent_row in rent_table:
                if poverty_row[2] == crime_row[0] and crime_row[0] == rent_row[
                        3]:
                    new_add = [poverty_row[2], poverty_row[1],\
                        poverty_row[3], poverty_row[4], poverty_row[9],\
                        crime_row[2], crime_row[3], crime_row[4], crime_row[5], crime_row[6], crime_row[7], crime_row[8], crime_row[9], crime_row[10], \
                        crime_row[11], rent_row[9], rent_row[10], rent_row[7], rent_row[8]]
                    combined_table.append(new_add)

    no_dups = dict(((x[0], x[1]), x) for x in combined_table)
    new_table = list(no_dups.values())

    new_table.insert(0, combined_header)
    utils.write_table("combined_data.csv", new_table)
示例#2
0
def to_neo(namespace):
    ng = NeoGraph()
    if not namespace.dont_truncate:
        ng.truncate()
    symbols = read_table('symbols')
    var_coef = read_table('coef_variation')
    cor = read_table(namespace.corr_id)
    cor = cor.query('cor == cor')
    symbols = symbols[(symbols['symbol'].isin(cor['symbol1'])) |
                      (symbols['symbol'].isin(cor['symbol2']))]
    symbols = symbols.merge(var_coef, on='symbol', how='left')
    ng.add_companies(symbols)
    ng.create_links(cor)
def main():
    tname = 'graded_performance_v3.csv'
    students = u.read_table(tname)
    header = students[0]
    header_one = students[0][:-6]

    students = students[1:]
    k = 10

    math_table = pass_fail_all_test(students, 7, header)
    reading_table = pass_fail_all_test(students, 8, header)
    writing_table = pass_fail_all_test(students, 9, header)

    m = k_cross(header_one + ['pass_math'], math_table, k)
    r = k_cross(header_one + ['pass_reading'], reading_table, k)
    w = k_cross(header_one + ['pass_writing'], writing_table, k)

    print(
        "\n\n-----------------------------------------------------------------"
    )
    print("              Decision Tree Pass/Fail Predictions")
    print("-----------------------------------------------------------------")
    print("\n\n                       Math Scores")
    print("-----------------------------------------------------------------")
    c_matrix(m[TP], m[TN], m[FP], m[FN])
    print("-----------------------------------------------------------------")
    print("\n\n                      Reading Scores")
    print("-----------------------------------------------------------------")
    c_matrix(r[TP], r[TN], r[FP], r[FN])
    print("-----------------------------------------------------------------")
    print("\n\n                      Writing Scores")
    print("-----------------------------------------------------------------")
    c_matrix(w[TP], w[TN], w[FP], w[FN])
示例#4
0
def proc_depth_info(bed_file, depth_file, depth_list):
    depth_df = pd.read_table(depth_file, header=None, prefix='x')
    bed_dt = utils.read_table(bed_file)
    region_df_list = []
    for xx in bed_dt:
        logger.info(' '.join(xx))
        if len(xx) > 3:
            chrom, start, end, region_name = xx[:4]
        elif len(xx) == 3:
            chrom, start, end = xx
        else:
            logger.error('bed文件输入错误,请核实')
            exit()
        #  ipdb.set_trace()
        region_df = depth_df[(depth_df.iloc[:, 0] == chrom)
                             & (depth_df.iloc[:, 1] >= int(start)) &
                             (depth_df.iloc[:, 1] <= int(end))]
        region_cov_df = get_coverage_depth(region_df, depth_list)
        length = int(end) - int(start) + 1
        region_cov_df = region_cov_df / length
        region_mean_df = region_df.iloc[:, 2:].sum() / length

        region_cov_df.loc['mean_depth', :] = region_mean_df

        region_cov_df.loc['region', :] = '%s:%s-%s' % (chrom, start, end)

        if len(xx) > 3:
            region_cov_df.loc['region_name', :] = region_name
        region_cov_df = region_cov_df.transpose()
        region_df_list.append(region_cov_df)
    #  ipdb.set_trace()
    return pd.concat(region_df_list)
def main():
    tname = 'StudentsPerformance.csv'
    students = u.read_table(tname)
    header = students[0][:-3] + ['AvgScore']
    students = group_scores(students[1:])
    # t_head, t_tab = clean_titanic(tname)
    out = k_cross(header, students, 10)
def do_arm():
    data, header = utils.read_table("combined_data_discretized.csv", True)
    rules = arm.Association_Rule_Mining(data,
                                        header,
                                        minsup=.3,
                                        minconf=.95,
                                        columns=[2, 3, 4, 5, 7])
    utils.rules_pretty_print(rules,
                             "Association Rule Mining for Crime Rate Factors")
def test_knn():
    data, _ = utils.read_table("combined_data_normalized.csv", True)
    class_index = 4
    predictors = [2, 3, 5, 9]
    results = knn.knn_classifier(data, class_index, predictors, 5, 5)
    accuracy = utils.compute_accuracy(results)
    print(accuracy)
    utils.confusion_matrix(results, "Crime Rate?",
                           "KNN Classifier Prediction of Crime Rate")
def test_naive_bayes():
    data, header = utils.read_table("combined_data_normalized.csv", True)
    class_index = 4
    predictors = [2, 3, 5, 7]
    results = bayes.naive_bayes_classifier(data, header, 10, class_index,
                                           predictors, [2, 3, 5, 9])
    accuracy = utils.compute_accuracy(results)
    print(accuracy)
    utils.confusion_matrix(results, "Crime Rate?",
                           "Naive Bayes Classifier Prediction of Crime Rate")
示例#9
0
def get_tileset(tileset_name, index=-1, override_offset=-1):
    offsets = utils.read_table('scripts/res/meta_tileset_load_offsets.tbl')
    base_offset = 0

    if override_offset == -1:
        if index == -1:
            idx_tbl = utils.read_table('scripts/res/meta_tileset_index.tbl')
            hits = [idx for idx in idx_tbl if idx_tbl[idx] == tileset_name]
            if len(hits) != 1:
                raise f"Found more or less than one entry for {tileset_name}, provide an index if it appears more than once"
            index = hits[0]

        base_offset = (int(offsets[index], 16) // 0x10) & 0xFF
    else:
        base_offset = override_offset

    tbl = utils.read_list(f'scripts/res/tilesets/{tileset_name}.lst',
                          base_offset)
    tbl[0] = ' '
    return tbl
def test_random_forest():
    data, header = utils.read_table("combined_data_discretized.csv", True)
    class_index = 4
    predictors = [2, 3, 5, 9]
    results = rforest.random_forest_classifier(data, header, class_index,
                                               predictors, 100, 25, 3)
    accuracy = utils.compute_accuracy(results)
    print(accuracy)
    utils.confusion_matrix(
        results, "Crime Rate?",
        "Random Forest Classifier Prediction of Crime Rate")
def test_decision_tree():
    data, header = utils.read_table("combined_data_discretized.csv", True)
    class_index = 4
    predictors = [2, 3, 5, 9]
    results = dtree.decision_tree_classifier(data, header, class_index,
                                             predictors, 30)
    accuracy = utils.compute_accuracy(results)
    print(accuracy)
    utils.confusion_matrix(
        results, "Crime Rate?",
        "Decision Tree Classifier Prediction of Crime Rate")
示例#12
0
def prepare_frames(split_name):
    """
    extract fraems for dataset `split_name`.
    pickle to disk.

    frames_info[samp_id]
                |- image: image path
                |- trans: word label (without space)
                `- frames: frame images filename list
    e.g. 
        frames_info[train_0]: {
            'frames': ['0.jpg', '1.jpg', '2.jpg', '3.jpg', '4.jpg', ...], 
            'image': '/home/paile/research/mjsynth/mnt/ramdisk/max/90kDICT32px/378/5/54_amethyst_2482.jpg', 
            'trans': 'amethyst'
        }
    """
    print 'processing:', split_name, '-------------------------'
    # read split info
    split = config.splits[split_name]
    trans_path = split.trans_pth
    frames_dir = split.frames_dir
    if path.isdir(frames_dir):
        glog.warning('%s already exists.', frames_dir)
        sys.exit(-1)
    print 'trans_path:', trans_path
    print 'frames_dir:', frames_dir

    # read trans
    all_trans = utils.read_table(trans_path)

    # assign sample id
    trans_ids = ['%s_%d' % (split_name, i) for i in range(len(all_trans))]
    all_trans = OrderedDict(zip(trans_ids, all_trans))

    # frames info
    frames_info = OrderedDict()
    err_images = []
    for samp_id in all_trans:
        try:
            image_pth = all_trans[samp_id][0]
            im = load_image(image_pth)
            frame_pths = extract_frames(im, samp_id, frames_dir)
            frames_info[samp_id] = {}
            frames_info[samp_id]['image']  = image_pth
            frames_info[samp_id]['trans']  = all_trans[samp_id][1]
            frames_info[samp_id]['frames'] = frame_pths
        except Exception, e:
            err_images.append(image_pth+'\n')
            print 'Error:', image_pth
def normalize_combined():
    combined_table, header = utils.read_table("combined_data.csv", True)
    columns = []
    new_header = []
    for x in range(len(header)):
        if x not in [2, 6, 7, 8, 9, 10, 11, 12, 13, 16]:
            new_header.append(header[x])
            columns.append(utils.get_column(combined_table, x))
    columns.append([
        round(columns[6][i] * 12 * 100 / columns[3][i], 1)
        for i in range(len(columns[0]))
    ])
    new_header.append("Pct_Income_as_Rent")

    columns[2] = normalize_data(columns[2])  # Poverty
    columns[3] = normalize_data(columns[3])  # Median Income
    columns[4] = discretize_data(columns[4], 5)  # Crime Rate
    columns[5] = normalize_data(columns[5])  # Population
    columns[6] = normalize_data(columns[6])  # Rent
    columns[7] = normalize_data(columns[7])  # Rent as percent of income.

    new_table = []
    for x in range(len(columns[0])):
        buffer = []
        for column in columns:
            buffer.append(column[x])
        new_table.append(buffer)

    new_table.insert(0, new_header)
    utils.write_table("combined_data_normalized.csv", new_table)

    columns[2] = discretize_data(columns[2], 3)  # Poverty
    columns[3] = discretize_data(columns[3], 3)  # Median Income
    #columns[4] = discretize_data(columns[4], 3) # Crime Rate
    columns[5] = discretize_data(columns[5], 5)  # Population
    columns[6] = discretize_data(columns[6], 3)  # Rent
    columns[7] = discretize_data(columns[7], 5)  # Rent as percent of income.

    new_table = []
    for x in range(len(columns[0])):
        buffer = []
        for column in columns:
            buffer.append(column[x])
        new_table.append(buffer)

    new_table.insert(0, new_header)
    utils.write_table("combined_data_discretized.csv", new_table)
示例#14
0
def main():
    # import table, get header, add average scores
    table_name = 'StudentsPerformance.csv'
    students = utils.read_table(table_name)
    header = students.pop(0)
    clean_data(students, header)

    # initialize variables
    n = 20  # number of decision trees to generate
    m = 15  # number of best trees to save

    # for each class label, create forest and classify test instances to get accuracy
    class_labels = [
        "reading score class", "writing score class", "math score class"
    ]
    for label in class_labels:
        classify_using_forest(students, header, n, m, label)
示例#15
0
 def __init__(self, data_path, c):
   import shutil
   
   self.data_path = data_path
   self.c = c
   self.output_path = _p.join('result', self.data_path)
   utils.mkdir_p(self.output_path)
   self.cgmdir = _p.join(self.data_path, 'cgm')
   self.spiketime_dir = _p.join(self.data_path, 'spiketime')
   
   self.single_data = utils.read_table(_p.join(self.data_path,'statid_correlation_mu_sigma2_urate.dat'))
   shutil.copyfile(_p.join(self.data_path,'statid_correlation_mu_sigma2_urate.dat'),
                   _p.join(self.output_path,'statid_correlation_mu_sigma2_urate.dat'))
   
   self.zero_firing = [i for i in self.single_data if self.single_data[i]['urate']<1e-8]
   self.pairs = utils.read_pairs(self.cgmdir)
   self.pair_data = [{'id': p} for p in self.pairs]
示例#16
0
def main():
    table = utils.read_table("StudentsPerformance.csv")
    header = table.pop(0)

    clean_data(table, header)

    # make prediction about math score class
    class_labels = [
        "writing score class", "reading score class", "math score class"
    ]
    attributes = [
        'gender', 'race/ethnicity', 'parental level of education', 'lunch',
        'test preparation course'
    ]

    for i, label in enumerate(["Writing", "Reading", "Math"]):
        k = 100
        subsampling_accuracy(table, header, k, class_labels[i], attributes,
                             label)
示例#17
0
    def __init__(self, mode, preprocess=DeepLoc.default_preprocessing):
        self.data = []
        self.preprocess = preprocess

        if mode not in ("train", "validation", "test"):
            raise ValueError(
                "Only 'train', 'validation' and 'test' modes are available.")

        mode_path = os.path.join("DeepLocAugmented", mode)
        poses_path = os.path.join(mode_path, "poses.txt")

        for image_path, x, y, z, qw, qx, qy, qz in read_table(
                poses_path,
                types=(str, float, float, float, float, float, float, float),
                delimiter="\t"):
            pose = (x, y, z, qw, qx, qy, qz)
            self.data.append((image_path, pose))

        self.size = len(self.data)
示例#18
0
    def __init__(self, mode, preprocess=default_preprocessing):
        self.data = []
        self.preprocess = preprocess

        if mode not in ("train", "test"):
            raise ValueError("Only 'train' and 'test' modes are available.")

        mode_path = os.path.join("DeepLoc", mode)
        poses_path = os.path.join(mode_path, "poses.txt")

        for filename, x, y, z, qw, qx, qy, qz in read_table(
                poses_path,
                types=(str, float, float, float, float, float, float, float),
                delimiter=" "):
            pose = (x, y, z, qw, qx, qy, qz)
            image_path = os.path.join(mode_path, "LeftImages",
                                      "{}.png".format(filename))
            self.data.append((image_path, pose))

        self.size = len(self.data)
def data_vis():
    data, header = utils.read_table("combined_data.csv", True)
    x_data, y_data = utils.get_column(data, 3), utils.get_column(data, 5)
    dv.scatter_plot(x_data, y_data, "Poverty Levels v. Crime Rate",
                    "Poverty Levels (%)", "Crime Rate per 100,000 people", 10,
                    "Poverty_v_Crime_graphed.png", 100, True)
def stats():
    data, header = utils.read_table("combined_data.csv", True)
    for row in data:
        if row[6] == min([x for x in utils.get_column(data, 6) if x != 0]):
            print(row)
示例#21
0
def main():
    table = utils.read_table("auto-data-clean.txt")
    for row in table:
        del row[-2]

    utils.write_table("auto-data-no-names.txt", table)
示例#22
0
    def __init__(self,
                 set_path,
                 mode,
                 preprocess=default_preprocessing,
                 augment=True,
                 only_front_camera=False,
                 split="manual",
                 return_image_paths=False):
        self.data = []
        self.preprocess = preprocess
        self.augment = augment
        self.only_front_camera = only_front_camera
        self.return_image_paths = return_image_paths

        print("Data:", self.data)
        print("Augment:", self.augment)
        print("OFC:", self.only_front_camera)
        print("RIP:", self.return_image_paths)
        print("---------------------")

        if mode not in ("train", "validation", "test", "visualize"):
            raise ValueError("Invalid mode.")

        self.mode = mode

        origin_path = os.path.join(set_path, "origin.txt")
        for line in lines(origin_path):
            self.origin = torch.Tensor(tuple(map(float, line.split(" "))))

        #camera_paths = [
        #   os.path.join("front", "center"),
        #  os.path.join("front", "left"),
        #   os.path.join("front", "right"),
        #  os.path.join("back", "center"),
        # os.path.join("back", "left"),
        #os.path.join("back", "right")
        #]

        #for camera_path in camera_paths:
        #   poses_path = os.path.join(mode_path, camera_path, "poses.txt")'''

        if mode == "visualize":
            mode_path = os.path.join(set_path, "front", "center")
            poses_path = os.path.join(mode_path, "poses.txt")
            for filename, x, y, qw, qx, qy, qz in read_table(
                    poses_path,
                    types=(str, float, float, float, float, float, float),
                    delimiter=" "):
                _, _, theta = euler_from_quaternion((qw, qx, qy, qz))
                assert theta >= -np.pi and theta <= np.pi
                ''' Normalization '''
                # This makes x and y independent of the origin
                x, y, _ = torch.Tensor([x, y, 0]) + self.origin
                x, y, theta = PerceptionCarDataset.normalize(x, y, theta)
                ''''''

                pose = (x, y, theta)
                image_path = os.path.join(mode_path, filename)
                # image_path = os.path.join(set_path, filename)
                self.data.append((image_path, pose))
        else:
            poses_path = os.path.join(set_path,
                                      "{}.{}.txt".format(mode, split))
            for *filenames, x, y, qw, qx, qy, qz in read_table(
                    poses_path,
                    types=(str, str, str, str, str, str, float, float, float,
                           float, float, float),
                    delimiter=" "):
                _, _, theta = euler_from_quaternion((qw, qx, qy, qz))
                assert theta >= -np.pi and theta <= np.pi
                ''' Normalization '''
                # This makes x and y independent of the origin
                x, y, _ = torch.Tensor([x, y, 0]) + self.origin
                x, y, theta = PerceptionCarDataset.normalize(x, y, theta)
                ''''''

                pose = (x, y, np.cos(theta), np.sin(theta))
                image_paths = map(lambda fn: os.path.join(set_path, fn),
                                  filenames)
                #image_paths = filenames
                if self.only_front_camera:
                    self.data.append((next(image_paths), pose))
                else:
                    self.data.append((*image_paths, pose))

        self.size = len(self.data)
示例#23
0
import model
import memory_based
import model_based_nmf
import model_based_svd
import metrics
import split_test
import utils

import random

import time

if __name__ == '__main__':

    input_arguments = {
        "Historic Data": utils.read_table(sys.argv[1], ':', ','),
        "Prediction Data": utils.read_table(sys.argv[2], ':')
    }

    output_file = sys.argv[3]

    start = time.time()

    test = True

    #input_arguments['Historic Data'] = random.sample(input_arguments['Historic Data'], int(len(input_arguments['Historic Data']) * 0.1) )

    if test:

        for test in range(0, 10):
示例#24
0
    elif args.policy_name == "OurDDPG":
        policy = OurDDPG.DDPG(state_dim, action_dim, max_action)
    elif args.policy_name == "DDPG":
        policy = DDPG.DDPG(state_dim, action_dim, max_action)

    policy.load("%s" % (file_name), directory=model_dir)

    if args.save_video:
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        video_name = video_dir + '/{}_TD3_{}.mp4'.format(
            datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"),
            args.env_name)
        out_video = cv2.VideoWriter(video_name, fourcc, 60.0, (640, 480))
        print(video_name)

    human_joint_angle = utils.read_table()
    for i in range(1):
        obs = env.reset()
        done = False
        pre_foot_contact = 1
        foot_contact = 1
        foot_contact_vec = np.asarray([1, 1, 1])
        gait_num = 0
        joing_angle_list = []
        coe_list = []
        joint_angle = np.zeros((0, 6))
        while not done:
            action = policy.select_action(np.array(obs))
            obs, reward, done, _ = env.step(action)
            utils.fifo_list(foot_contact_vec, obs[-2])
            if 0 == np.std(foot_contact_vec):