def write_table(df, table, schema, host): try: with sclsql.connection(schema, sclsql.ConnectionParameters(host)) as connection: df.to_sql(table, connection, flavor='mysql', index=False, if_exists='append', chunksize=2000) except: print('Table {} initialised'.format(table)) try: os.system("/opt/refresh-rnc-table-privileges {} {}".format(table, schema)) except: print('Script for refreshing privileges was not found') try: os.system("/opt/refresh-mysql-table-privileges {} {}".format(table, schema)) except: print('Script for refreshing privileges was not found') sql = 'select * from {} limit 1000'.format(table) t = readsql(schema, sql, server=host) if t.shape[0] == 0: try: with sclsql.connection(schema, sclsql.ConnectionParameters(host)) as connection: df.to_sql(table, connection, flavor='mysql', index=False, if_exists='append', chunksize=2000) except: print('Table {} writted'.format(table)) else: print('Table {} was alraedy initialised'.format(table))
def get_count(self, state, election_id=None, el_type=None): if self.source == 'v4': database = sd.state2db_v4[state] elif self.source == 'v3': database = sd.state2statedb[state] if type(election_id)==int: query = """SELECT count(*) AS count FROM voter2elections WHERE election_id = {}""".format( election_id ) with sclsql.connection( database, sclsql.ConnectionParameters(host)) as connection: df = pd.read_sql(query, connection) return df['count'].values[0] elif el_type is not None: temp = getattr(self, sd.state2abrv[state]) ids = getattr(temp, el_type)['election_id'].tolist() if source == 'v4': database = sd.state2db_v4[state] elif source == 'Local_or_Municipal': database = sd.state2statedb[state] counts = [] counts_not_null = [] for i in ids: query = """select count(*) as count from voter2elections where election_id = {}""".format( i ) with sclsql.connection( database, sclsql.ConnectionParameters(host)) as connection: df = pd.read_sql(query, connection) counts += [df['count'].values[0]] query = """select count(*) as count from voter2elections where election_id = {} and eresult is not null""".format( i ) with sclsql.connection( database, sclsql.ConnectionParameters(host)) as connection: df = pd.read_sql(query, connection) counts_not_null += [df['count'].values[0]] temp = getattr(self, sd.state2abrv[state]) getattr(temp, el_type)['counts'] = counts getattr(temp, el_type)['results'] = counts_not_null
def readsql(database, query, server="scls2", state=None, dedup=False, print_time=False): with sclsql.connection(database, sclsql.ConnectionParameters(server)) as connection: df = pd.read_sql(query, connection) T = time.time() if state is not None and 'voter_id' in df.columns: df['uid'] = [int(i+sd.state2fips[state]*1e9) for i in df.voter_id] df.set_index('uid', inplace=True) df.drop('voter_id', axis=1, inplace=True) if 'voter_id' in df.columns and dedup == True: df.drop_duplicates(subset='voter_id', inplace=True, keep='last') if print_time: print "Time taken =", time.time() - T return df
def get_elections_info(source='v4', yield_dict=False, host='scls2'): """ Function that stores past elections' info into a dicitonary and makes it available as nested objects istance. Parameters ---------- source: string Database from which information are retrieved. yield_dict: bool Output the dictionary. Returns ------- elections_dict : nested class instances, (or dictionary) Dictionary is made available as nested class objects. """ elections_dict = defaultdict() ## California is not available for states in [i.name for i in sd.state_population_order]: st = OURCODE[states] sabr = sd.state2abrv[states] elections_dict[sabr] = {} try: if source == 'v4': query = """select * from elections where state = '{}' """.format( sabr) database = sd.state2db_v4[states] with sclsql.connection( database, sclsql.ConnectionParameters(host)) as connection: election = pd.read_sql(query, connection) elif source == 'v3': query = """select * from elections where state = '{}' """.format( st) database = sd.state2statedb[states] with sclsql.connection( database, sclsql.ConnectionParameters(host)) as connection: election = pd.read_sql(query, connection) election['etype'] = election['etype'].map({1: 'General', 2: 'Special', 3: 'Local_or_Municipal', 4: 'Primary', 5: 'Consolidated_General', 6: 'Runoff', 7: 'Consolidated_Primary', 8: 'Presidential_Primary', 9: 'Governor_Primary', 10: 'Republican_Caucus'}) f = lambda x: "_".join( x.split("-")[0].split("_")[:-1] ) election['type'] = election['ename'].apply(f) election['type'] = election['etype'] election['year'] = election['edate'].apply(lambda x: x.year).tolist() election['month'] = election['edate'].apply(lambda x: x.month).tolist() election['day'] = election['edate'].apply(lambda x: x.day).tolist() election_type_dict = {} election_type_dict = dict( zip(election['type'].unique(),election['etype'].unique()) ) elections_dict[sabr] = {} elections_dict[sabr]['type'] = pd.DataFrame( zip( election['etype'].unique().tolist(), election['election_id'].unique().tolist()), index=range(election['type'].unique().shape[0]), columns=['etype', 'election_id']) #elections_dict[states]['type_id'] = primaries['etype'].unique().tolist() for i,j in election_type_dict.items(): mask = election['type'] == i year = election[mask]['year'].tolist() order = np.argsort(year) year = sorted(year) date = list ( election[mask]['edate'].values[order] ) electionid = list( election[mask]['election_id'].values[order] ) ename = election[mask]['ename'].values[order] elections_dict[sabr][i] = pd.DataFrame( zip( electionid, date, ename ), index = range(election[mask].shape[0]), columns=[ 'election_id', 'date', 'ename'] ) except: pass class Elections(object): """ Class that transforms nested dictionaries into an object to store info on elections. """ def __init__(self, adict): self.__dict__.update(adict) for k, v in adict.items(): if isinstance(v, dict): self.__dict__[k] = Elections(v) self.source = source def __getitem__(self, key): return getattr(self, key) def get_count(self, state, election_id=None, el_type=None): if self.source == 'v4': database = sd.state2db_v4[state] elif self.source == 'v3': database = sd.state2statedb[state] if type(election_id)==int: query = """SELECT count(*) AS count FROM voter2elections WHERE election_id = {}""".format( election_id ) with sclsql.connection( database, sclsql.ConnectionParameters(host)) as connection: df = pd.read_sql(query, connection) return df['count'].values[0] elif el_type is not None: temp = getattr(self, sd.state2abrv[state]) ids = getattr(temp, el_type)['election_id'].tolist() if source == 'v4': database = sd.state2db_v4[state] elif source == 'Local_or_Municipal': database = sd.state2statedb[state] counts = [] counts_not_null = [] for i in ids: query = """select count(*) as count from voter2elections where election_id = {}""".format( i ) with sclsql.connection( database, sclsql.ConnectionParameters(host)) as connection: df = pd.read_sql(query, connection) counts += [df['count'].values[0]] query = """select count(*) as count from voter2elections where election_id = {} and eresult is not null""".format( i ) with sclsql.connection( database, sclsql.ConnectionParameters(host)) as connection: df = pd.read_sql(query, connection) counts_not_null += [df['count'].values[0]] temp = getattr(self, sd.state2abrv[state]) getattr(temp, el_type)['counts'] = counts getattr(temp, el_type)['results'] = counts_not_null def get_object(adict): return Elections(adict) if yield_dict==False: return get_object(elections_dict) else: return elections_dict
def get_models_info(source='v4', yield_dict=False, host='scls2'): """ Function that stores past elections' info into a dicitonary and makes it available as nested objects istance. Parameters ---------- source: string Database from which information are retrieved. Returns ------- models_dict : nested class instances, (or dictionary) Dictionary is made available as nested class objects. """ models_dict = {} for state in sd.state_name_population_order: st = OURCODE[state] sabr = sd.state2abrv[state] models_dict[sabr] = {} if source == 'aristotle': query = """select * from armodeltypes""" database = sd.state2db_v4[state] with sclsql.connection( database, sclsql.ConnectionParameters(server)) as connection: models = pd.read_sql(query, connection) elif source == 'v3': query = """select * from modeltypes""" database = sd.state2statedb[state] with sclsql.connection( database, sclsql.ConnectionParameters(host)) as connection: models = pd.read_sql(query, connection) models['month'] = models['date_added'].apply(lambda x: x.month).tolist() models['day'] = models['date_added'].apply(lambda x: x.day).tolist() models_dict_type = {} groups = models.groupby(['category']) for igtype, gtype in groups: models_dict_type[igtype] = ' '.join( map( str, gtype['model_id'].unique().tolist() ) ) models_dict[sabr] = {} models_types_label = ['_'.join(i.split(' ')) for i in models_dict_type.keys()] models_types = models_dict_type.keys() models_ids = models_dict_type.values() models_dict[sabr]['type'] = pd.DataFrame( zip( models_types_label, models_ids), index=range(models['category'].unique().shape[0]), columns=['category', 'model_id']) f = lambda x: (x.year-2014)*365 +x.dayofyear for i,j in enumerate(models_types): mask = models['category'] == j year = map(f, models[mask]['date_added'].tolist()) order = np.argsort(year) year = sorted(year) date = list ( models[mask]['date_added'].values[order] ) modelid = list( models[mask]['model_id'].values[order] ) name = models[mask]['name'].values[order] version = models[mask]['version'].values[order] mtype = models[mask]['type'].values[order] author = models[mask]['author'].values[order] permission = models[mask]['permissions'].values[order] models_dict[sabr][models_types_label[i]] = pd.DataFrame( zip( modelid, date, name, mtype, version, author, permission), index = range(models[mask].shape[0]), columns=[ 'model_id', 'date', 'ename', 'type', 'version', 'author', 'permissions'] ) class Models(object): """ Class that transforms nested dictionaries into an object to store info on elections. """ def __init__(self, adict): self.__dict__.update(adict) for k, v in adict.items(): if isinstance(v, dict): self.__dict__[k] = Models(v) self.source = source def __getitem__(self, key): return getattr(self, key) def get_object(adict): return Models(adict) instace_dict = get_object(models_dict) if yield_dict==False: return get_object(models_dict) else: return models_dict