def get_data(f, ind_vars): """ Reads f and returns a list of the independent variables. """ lines = reader(f) variables = lines.next() var_dict = make_variable_dictionary(variables) # Collect all of the data, but only the variables of interest. data = [] for line in lines: datum = get_datum(line, var_dict, ind_vars, None) data.append(datum) return data
def construct(f, ind_vars, target_var, max_depth): """ Given a csv data file 'f', a list of independent variables, and the target variable, a tree is built and returned. """ lines = reader(f) variables = lines.next() var_dict = make_variable_dictionary(variables) # Collect all of the data, but only the variables of interest. data = [] for line in lines: datum = get_datum(line, var_dict, ind_vars, target_var) data.append(datum) # Simplify variable dictionary: var_dict = simplify_var_dict(ind_vars, target_var) return make_tree(data, ind_vars, target_var, var_dict, max_depth, len(data))