# ('Title', 'categorical'): titles, # ('Ticket_Code', 'categorical'): ticket_codes, # ('Ticket_Val', 'continuous'): None, } target_var = ["Age", "continuous"] # Only build a tree if necessary. if build_tree: # Build tree to predict age: f = file("data/%s_age.csv" % filename, "r") max_depth = 6 root = construct(f, ind_vars, target_var, max_depth) f.close() f = file("trees/age.tree", "w") write_tree(root, f) f.close() # Trim tree: TODO # Use tree to predict age: # Compile list of independent variables used to predict target variable tree = read_tree("trees/age.tree") f = file("data/%s_no_age.csv" % filename, "r") ind_vars[("PassengerId", "continuous")] = None data = get_data(f, ind_vars) var_dict = simplify_var_dict(ind_vars, None) # Output target variable predictions to csv. f = file("predictions/ages.csv", "w") f.write("PassengerId,%s\n" % target_var[0])
pass elif target_var and target_var[1] == 'continuous': datum.append(float(line[var_dict[target_var[0]]])) elif target_var: raise Exception('Invalid variable type: %s' % target_var[1]) return datum if __name__ == '__main__': f = file('data/train_titles.csv', 'r') ind_vars = { ('Sex', 'categorical'): ['male', 'female'], ('Pclass', 'categorical'): ['1', '2', '3'], ('Embarked', 'categorical'): ['S', 'C', 'Q'], ('Title', 'categorical'): titles, ('Ticket_Code', 'categorical'): ticket_codes, ('SibSp', 'continuous'): None, ('Parch', 'continuous'): None, ('Fare', 'continuous'): None, ('Ticket_Val', 'continuous'): None, } target_var = ['Survived', 'categorical', '0', '1'] max_depth = 100 tree = construct(f, ind_vars, target_var, max_depth) f.close() f = file('trees/temp.tree', 'w') write_tree(tree, f) f.close()