def main(): find_school_key() # Load all databases that join on school ID and join all years for each feature for Database in Database_l: instance = Database() con = utilities.connect_to_sql('temp') with con: cur = con.cursor() for year in config.year_l: instance.extract(cur, year) con = utilities.connect_to_sql('joined') with con: cur = con.cursor() join_years(cur, instance.new_table_s, 'ENTITY_CD') # Load all databases that join on school district ID and join all years for each feature for Database in DistrictDatabase_l: instance = Database() for year in config.year_l: instance.extract(year) con = utilities.connect_to_sql('joined') with con: cur = con.cursor() join_years(cur, instance.new_table_s, 'district') # Join all databases of features together con = utilities.connect_to_sql('joined') with con: cur = con.cursor() join_databases(cur, Database_l, DistrictDatabase_l)
def find_school_key(): """ Creates a table of each school ID and name """ con = utilities.connect_to_sql('joined') with con: cur = con.cursor() command_s = 'DROP TABLE IF EXISTS school_key;' cur.execute(command_s) command_s = """CREATE TABLE school_key SELECT ENTITY_CD, ENTITY_NAME FROM SRC{0:d}.`{1}` WHERE YEAR = {0:d} AND SUBJECT = 'REG_ENG' AND SUBGROUP_NAME = 'General Education' AND ENTITY_CD NOT LIKE '%0000' AND ENTITY_CD NOT LIKE '00000000000%' AND ENTITY_CD != '111111111111' AND ENTITY_CD != '240901040001' AND ENTITY_CD != '241001060003'""" # The REG_ENG is kind of a hack; and I had to remove 240901040001 and 241001060003 because the rows were multiplying exponentially in the database like a virus instance = RegentsPassRate() command_s = command_s.format(config.year_l[-1], instance.orig_table_s_d[config.year_l[-1]]) cur.execute(command_s) command_s = """ALTER TABLE school_key ADD district CHAR(6)""" cur.execute(command_s) command_s = """UPDATE school_key SET district = SUBSTRING(ENTITY_CD, 1, 6);""" cur.execute(command_s) command_s = """ALTER TABLE school_key ADD INDEX ENTITY_CD (ENTITY_CD)""" cur.execute(command_s)
def find_school_key(): """ Creates a table of each school ID and name """ con = utilities.connect_to_sql('joined') with con: cur = con.cursor() command_s = 'DROP TABLE IF EXISTS school_key;' cur.execute(command_s) command_s = """CREATE TABLE school_key SELECT ENTITY_CD, ENTITY_NAME FROM SRC{0:d}.`{1}` WHERE YEAR = {0:d} AND SUBJECT = 'REG_ENG' AND SUBGROUP_NAME = 'General Education' AND ENTITY_CD NOT LIKE '%0000' AND ENTITY_CD NOT LIKE '00000000000%' AND ENTITY_CD != '111111111111' AND ENTITY_CD != '240901040001' AND ENTITY_CD != '241001060003'""" # The REG_ENG is kind of a hack; and I had to remove 240901040001 and 241001060003 because the rows were multiplying exponentially in the database like a virus instance = RegentsPassRate() command_s = command_s.format( config.year_l[-1], instance.orig_table_s_d[config.year_l[-1]]) cur.execute(command_s) command_s = """ALTER TABLE school_key ADD district CHAR(6)""" cur.execute(command_s) command_s = """UPDATE school_key SET district = SUBSTRING(ENTITY_CD, 1, 6);""" cur.execute(command_s) command_s = """ALTER TABLE school_key ADD INDEX ENTITY_CD (ENTITY_CD)""" cur.execute(command_s)
def plot_feature_histograms(): """ Plot histograms of all features. """ con = utilities.connect_to_sql('joined') with con: cur = con.cursor() for database_s in database_s_l: field_s_l = ['ENTITY_CD'] + \ ['{0}_{1:d}'.format(database_s, year) for year in config.year_l] raw_data_a = utilities.select_data(con, cur, field_s_l, 'master', output_type='np_array') data_a = raw_data_a[:, 1:] valid_la = ~np.isnan(data_a) fig = plt.figure() ax = fig.add_subplot(111) for i, year in enumerate(config.year_l): col_a = data_a[:, i] ax.hist(col_a[valid_la[:, i]], bins=20, color=config.year_plot_color_d[year], histtype='step') ax.set_xlabel(database_s) ax.set_ylabel('Frequency') ax.ticklabel_format(useOffset=False) plt.savefig(os.path.join(save_path, database_s + '.png'))
def main(**kwargs): """ Read in all data and run the fits/predictions over all school statistics separately """ ## Read in data con = utilities.connect_to_sql('joined') with con: cur = con.cursor() data_a_d = {} all_Database_l = join_data.Database_l + join_data.DistrictDatabase_l for Database in all_Database_l: Instance = Database() feature_s = Instance.new_table_s field_s_l = ['ENTITY_CD'] + \ ['{0}_{1:d}'.format(feature_s, year) for year in config.year_l] data_a_d[feature_s] = utilities.select_data(con, cur, field_s_l, 'master', output_type='np_array') ## Run prediction over all features for feature_s in data_a_d.iterkeys(): predict_a_feature(data_a_d, feature_s, **kwargs)
return ax save_path = os.path.join(config.plot_path, 'explore_data') if not os.path.isdir(save_path): os.mkdir(save_path) database_s_l = [] for Database in join_data.Database_l + join_data.DistrictDatabase_l: Instance = Database() database_s_l.append(Instance.new_table_s) data_a_d = {} con = utilities.connect_to_sql('joined') with con: cur = con.cursor() for database_s in database_s_l: field_s_l = ['ENTITY_CD'] + \ ['{0}_{1:d}'.format(database_s, year) for year in config.year_l] raw_data_a = utilities.select_data(con, cur, field_s_l, 'master', output_type='np_array') data_a_d[database_s] = raw_data_a[:, 1:] if __name__ == '__main__': main()