Пример #1
0
def write_table(df, table, schema, host):

    try:
        with sclsql.connection(schema, sclsql.ConnectionParameters(host)) as connection:
            df.to_sql(table, connection, flavor='mysql', index=False, if_exists='append', chunksize=2000)
    except:
        print('Table {} initialised'.format(table))

    try:
        os.system("/opt/refresh-rnc-table-privileges {} {}".format(table, schema))
    except:
        print('Script for refreshing privileges was not found')

    try:
        os.system("/opt/refresh-mysql-table-privileges {} {}".format(table, schema))
    except:
        print('Script for refreshing privileges was not found')

    sql = 'select * from {} limit 1000'.format(table)
    t = readsql(schema, sql, server=host)

    if t.shape[0] == 0:
        try:
            with sclsql.connection(schema, sclsql.ConnectionParameters(host)) as connection:
                df.to_sql(table, connection, flavor='mysql', index=False, if_exists='append', chunksize=2000)
        except:
            print('Table {} writted'.format(table))
    else:
            print('Table {} was alraedy initialised'.format(table))
Пример #2
0
		def get_count(self, state, election_id=None, el_type=None):

			if self.source == 'v4':
				database = sd.state2db_v4[state]
			elif self.source == 'v3':
				database = sd.state2statedb[state]

			if type(election_id)==int:

				query = """SELECT count(*) AS count FROM voter2elections
							WHERE election_id = {}""".format( election_id )

				with sclsql.connection( database, sclsql.ConnectionParameters(host)) as connection:
					df = pd.read_sql(query, connection)
				return df['count'].values[0]

			elif el_type is not None:

				temp = getattr(self, sd.state2abrv[state])
				ids = getattr(temp, el_type)['election_id'].tolist()

				if source == 'v4':
					database = sd.state2db_v4[state]
				elif source == 'Local_or_Municipal':
					database = sd.state2statedb[state]

				counts = []
				counts_not_null = []

				for i in ids:

					query = """select count(*) as count from voter2elections where election_id = {}""".format( i )

					with sclsql.connection( database, sclsql.ConnectionParameters(host)) as connection:
						df = pd.read_sql(query, connection)

					counts += [df['count'].values[0]]

					query = """select count(*) as count from voter2elections where election_id = {} and eresult is not null""".format( i )

					with sclsql.connection( database, sclsql.ConnectionParameters(host)) as connection:
						df = pd.read_sql(query, connection)

					counts_not_null += [df['count'].values[0]]

				temp = getattr(self, sd.state2abrv[state])
				getattr(temp, el_type)['counts'] = counts
				getattr(temp, el_type)['results'] = counts_not_null
Пример #3
0
def readsql(database, query, server="scls2", state=None, dedup=False,
            print_time=False):

    with sclsql.connection(database, sclsql.ConnectionParameters(server)) as connection:
        df = pd.read_sql(query, connection)

    T = time.time()
    if state is not None and 'voter_id' in df.columns:

        df['uid'] = [int(i+sd.state2fips[state]*1e9) for i in df.voter_id]
        df.set_index('uid', inplace=True)
        df.drop('voter_id', axis=1, inplace=True)

    if 'voter_id' in df.columns and dedup == True:
        df.drop_duplicates(subset='voter_id', inplace=True, keep='last')
    if print_time:
        print "Time taken =", time.time() - T
    return df
Пример #4
0
def get_elections_info(source='v4', yield_dict=False, host='scls2'):

	"""
	Function that stores past elections' info into a dicitonary and makes it
	available as nested objects istance.

	Parameters
	----------
	source: string
		Database from which information are retrieved.

	yield_dict: bool
		Output the dictionary.


	Returns
	-------
	elections_dict : nested class instances, (or dictionary)
		Dictionary is made available as nested class objects.

	"""

	elections_dict = defaultdict()

	## California is not available

	for states in [i.name for i in sd.state_population_order]:

		st = OURCODE[states]
		sabr = sd.state2abrv[states]
		elections_dict[sabr] = {}
		try:
			if source == 'v4':
				query = """select * from elections where state = '{}' """.format( sabr)
				database = sd.state2db_v4[states]
				with sclsql.connection( database, sclsql.ConnectionParameters(host)) as connection:
					election = pd.read_sql(query, connection)

			elif source == 'v3':
				query = """select * from elections where state = '{}' """.format( st)
				database = sd.state2statedb[states]
				with sclsql.connection( database, sclsql.ConnectionParameters(host)) as connection:
					election = pd.read_sql(query, connection)
				election['etype'] = election['etype'].map({1: 'General',
														2: 'Special',
														3: 'Local_or_Municipal',
														4: 'Primary',
														5: 'Consolidated_General',
														6: 'Runoff',
														7: 'Consolidated_Primary',
														8: 'Presidential_Primary',
														9: 'Governor_Primary',
														10: 'Republican_Caucus'})

			f = lambda x: "_".join( x.split("-")[0].split("_")[:-1] )

			election['type'] = election['ename'].apply(f)
			election['type'] = election['etype']
			election['year'] = election['edate'].apply(lambda x: x.year).tolist()
			election['month'] = election['edate'].apply(lambda x: x.month).tolist()
			election['day'] = election['edate'].apply(lambda x: x.day).tolist()
			election_type_dict = {}
			election_type_dict = dict( zip(election['type'].unique(),election['etype'].unique()) )
			elections_dict[sabr] = {}
			elections_dict[sabr]['type'] = pd.DataFrame( zip( election['etype'].unique().tolist(), election['election_id'].unique().tolist()),
														 index=range(election['type'].unique().shape[0]), columns=['etype', 'election_id'])

		#elections_dict[states]['type_id'] = primaries['etype'].unique().tolist()

			for i,j in election_type_dict.items():
				mask = election['type'] == i
				year = election[mask]['year'].tolist()
				order = np.argsort(year)
				year = sorted(year)
				date = list ( election[mask]['edate'].values[order] )
				electionid = list( election[mask]['election_id'].values[order] )
				ename = election[mask]['ename'].values[order]
				elections_dict[sabr][i] = pd.DataFrame( zip( electionid, date, ename ),
														index = range(election[mask].shape[0]),
														columns=[ 'election_id', 'date', 'ename'] )

		except:
			pass

	class Elections(object):

		"""
		Class that transforms nested dictionaries into an
		object to store info on elections.
		"""

		def __init__(self, adict):

			self.__dict__.update(adict)
			for k, v in adict.items():
				if isinstance(v, dict):
					self.__dict__[k] = Elections(v)

			self.source = source


		def __getitem__(self, key):
			return getattr(self, key)


		def get_count(self, state, election_id=None, el_type=None):

			if self.source == 'v4':
				database = sd.state2db_v4[state]
			elif self.source == 'v3':
				database = sd.state2statedb[state]

			if type(election_id)==int:

				query = """SELECT count(*) AS count FROM voter2elections
							WHERE election_id = {}""".format( election_id )

				with sclsql.connection( database, sclsql.ConnectionParameters(host)) as connection:
					df = pd.read_sql(query, connection)
				return df['count'].values[0]

			elif el_type is not None:

				temp = getattr(self, sd.state2abrv[state])
				ids = getattr(temp, el_type)['election_id'].tolist()

				if source == 'v4':
					database = sd.state2db_v4[state]
				elif source == 'Local_or_Municipal':
					database = sd.state2statedb[state]

				counts = []
				counts_not_null = []

				for i in ids:

					query = """select count(*) as count from voter2elections where election_id = {}""".format( i )

					with sclsql.connection( database, sclsql.ConnectionParameters(host)) as connection:
						df = pd.read_sql(query, connection)

					counts += [df['count'].values[0]]

					query = """select count(*) as count from voter2elections where election_id = {} and eresult is not null""".format( i )

					with sclsql.connection( database, sclsql.ConnectionParameters(host)) as connection:
						df = pd.read_sql(query, connection)

					counts_not_null += [df['count'].values[0]]

				temp = getattr(self, sd.state2abrv[state])
				getattr(temp, el_type)['counts'] = counts
				getattr(temp, el_type)['results'] = counts_not_null


	def get_object(adict):

	 	return Elections(adict)

	if  yield_dict==False:
		return get_object(elections_dict)

	else:
		return elections_dict
Пример #5
0
def get_models_info(source='v4', yield_dict=False, host='scls2'):

	"""
	Function that stores past elections' info into a dicitonary and makes it
	available as nested objects istance.

	Parameters
	----------
	source: string
		Database from which information are retrieved.

	Returns
	-------
	models_dict : nested class instances, (or dictionary)
		Dictionary is made available as nested class objects.

	"""

	models_dict = {}

	for state in sd.state_name_population_order:

		st = OURCODE[state]

		sabr = sd.state2abrv[state]

		models_dict[sabr] = {}

		if source == 'aristotle':
			query = """select * from armodeltypes"""
			database = sd.state2db_v4[state]
			with sclsql.connection( database, sclsql.ConnectionParameters(server)) as connection:
				models = pd.read_sql(query, connection)

		elif source == 'v3':
			query = """select * from modeltypes"""
			database = sd.state2statedb[state]
			with sclsql.connection( database, sclsql.ConnectionParameters(host)) as connection:
				models = pd.read_sql(query, connection)

		models['month'] = models['date_added'].apply(lambda x: x.month).tolist()
		models['day'] = models['date_added'].apply(lambda x: x.day).tolist()

		models_dict_type = {}

		groups = models.groupby(['category'])

		for igtype, gtype in groups:

			models_dict_type[igtype] = ' '.join( map( str, gtype['model_id'].unique().tolist() ) )

		models_dict[sabr] = {}

		models_types_label = ['_'.join(i.split(' ')) for i in models_dict_type.keys()]
		models_types = models_dict_type.keys()

		models_ids = models_dict_type.values()
		models_dict[sabr]['type'] = pd.DataFrame( zip( models_types_label, models_ids),
													 index=range(models['category'].unique().shape[0]),
													 columns=['category', 'model_id'])

		f = lambda x: (x.year-2014)*365 +x.dayofyear

		for i,j in enumerate(models_types):
			mask = models['category'] == j
			year = map(f, models[mask]['date_added'].tolist())
			order = np.argsort(year)
			year = sorted(year)
			date = list ( models[mask]['date_added'].values[order] )
			modelid = list( models[mask]['model_id'].values[order] )
			name = models[mask]['name'].values[order]
			version = models[mask]['version'].values[order]
			mtype = models[mask]['type'].values[order]
			author = models[mask]['author'].values[order]
			permission = models[mask]['permissions'].values[order]
			models_dict[sabr][models_types_label[i]] = pd.DataFrame( zip( modelid, date, name, mtype, version, author, permission),
													index = range(models[mask].shape[0]),
													columns=[ 'model_id', 'date', 'ename', 'type', 'version', 'author', 'permissions'] )


	class Models(object):

		"""
		Class that transforms nested dictionaries into an
		object to store info on elections.
		"""

		def __init__(self, adict):

			self.__dict__.update(adict)
			for k, v in adict.items():
				if isinstance(v, dict):
					self.__dict__[k] = Models(v)

			self.source = source

		def __getitem__(self, key):
			return getattr(self, key)


	def get_object(adict):

	 	return Models(adict)

	instace_dict = get_object(models_dict)

	if  yield_dict==False:
		return get_object(models_dict)

	else:
		return models_dict