def request_candidates_bills(): # SQL Retriever poly_voting_record_sql_retriever = SqlStorer( db_name=cand_bills_db_name, table_name=cand_bills_table_name) poly_voting_record_sql_retriever.set_up_connection() # ---------------------- wild_card_query = {} bills_list = [] cand_id = request.args.get("VoteSmartCandID") category_id = request.args.get("VoteSmartPrimaryCategoryId") print("Candidate ID: %s " % cand_id) categories = category_id.strip(',').split(',') print("Category ID: %s " % categories) # Specialized raw request query # raw_query = " SELECT B.*,V.* FROM PoliticianInfo.bill_info_table B \ # inner join PoliticianInfo.politician_voting_record_table V ON \ # B.VoteSmartBillId = V.VoteSmartBillId \ # where V.VoteSmartCandID='%s' and B.VoteSmartPrimaryCategoryId='%s' ORDER BY `DateIntroduced`" % (cand_id, category_id); raw_query = " SELECT B.*,V.* FROM PoliticianInfo.bill_info_table B \ inner join PoliticianInfo.politician_voting_record_table V ON \ B.VoteSmartBillId = V.VoteSmartBillId \ where V.VoteSmartCandID='%s' and (" % (cand_id) # Iterate through category # options appending each to query. for i in range(0, len(categories)): category = categories[i] raw_query += "B.VoteSmartPrimaryCategoryId='%s'" % category if i == (len(categories) - 1): raw_query += ") " break raw_query += " OR " raw_query += "ORDER BY `DateIntroduced`" response = poly_voting_record_sql_retriever.execute_raw_query( raw_query, verbose=False) if response: bills_list = convert_bills_response_2_detailed_dict(response) # return json.dumps(bills_list, default = myconverter) # Fixed date formatting issue using following source: # https://stackoverflow.com/questions/11875770/how-to-overcome-datetime-datetime-not-json-serializable/36142844#36142844 return json.dumps(bills_list, indent=4, sort_keys=True, default=str)
def prime_raw_dataset_from_sql(topic_name="abortion"): global ml_sql_retrieve dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True) encoder = info.features['text'].encoder if ml_sql_retrieve is None: ml_sql_retrieve = SqlStorer(db_name="BillInfoDB", table_name="ml_bill_info_table") ml_sql_retrieve.set_up_connection() ############################ ###PULL REPUBLICAN RESULTS## ############################ republican_results = ml_sql_retrieve.execute_raw_query( raw_query_str=(sql_republican_weight_request % topic_name)) ############################ ####PULL DEMOCRAT RESULTS### ############################ democrat_results = ml_sql_retrieve.execute_raw_query( raw_query_str=(sql_democrat_weight_request % topic_name)) ############################ ####PRIMED RESULTS READY#### ############################ primed_results = prime_sql_results(rep_sql_res=republican_results, dem_sql_res=democrat_results) train_dataset, test_dataset = dataset['train'], dataset['test'] entry_list = [] label_list = [] encoded_entry_list = [] hash_set = set() for entry in primed_results: try: if (entry["content"]): # Only add entry if it is string content = entry["content"].encode('utf-8') # Take a hash of the content to check that if there # are duplicates hash_object = hashlib.md5(content) hash_digest = hash_object.hexdigest() if hash_digest not in hash_set: hash_set.add(hash_digest) # If the content is new (no collision) # add it to the list entry_list.append(entry["content"]) encoded_entry_list.append(encoder.encode(entry["content"])) label_list.append(entry["label"]) except KeyError: continue return train_dataset, test_dataset, encoded_entry_list, label_list, encoder
# Makes URL call and returns json formated data def json_request(url): r = http.request('GET', url) json_res = json.loads(r.data) return json_res office_type_state_url = "http://api.votesmart.org/Officials.getByOfficeTypeState?key=9125e6afe9e4b86c1dd87da7f889e7b9&o=JSON&officeTypeId=L&stateId=%s" # RETRIEVABLES vote_smart_cand_id = None vote_smart_dist_id = None print(state_dict) sql_store = SqlStorer() sql_store.set_up_connection() wild_card_null_query = {"VoteSmartID": ""} wild_card_valid_query = { "State": "", "Role": "State Representative", "InOffice": "Yes" } update_wild_card_entry = {"VoteSmartID": "", "VoteSmartDistrictID": ""} update_wild_card_condition = {"Name": "", "Id": ""} # role_translation = { "State Representative" : "State House", # "State Senator" : "State Senate" }
import json VERBOSE_DEBUG = True # Used to clear the screen clear = lambda: os.system("clear") # Set up http http = urllib3.PoolManager() dict_fields = {} fail_log_fp = open('mismatches.log', 'w+') # Set up sql politician_sql = SqlStorer() politician_sql.set_up_connection() ################################################## # Makes URL call and returns json formated data ################################################## def json_request(url): if VERBOSE_DEBUG: print(url) r = http.request('GET', url) json_res = json.loads(r.data) return json_res ##################################################
DEBUG_TOTAL_VOTES = False DEBUG_UNORTHODOX_ACTIONS = False DEBUG_DICTIONARY_FIELDS = True COMMIT_VOTE_RECORDS = True COMMIT_BILL_RECORDS = True sql_query_dict = {"VoteSmartID": ""} # HTTP Set up http = urllib3.PoolManager() # SQL Set up - Default Politician Database sql_store = SqlStorer() sql_store.set_up_connection() # SQL Set Up - Used for voting records voting_record_sql = SqlStorer(db_name="PoliticianInfo", table_name="politician_voting_record_table") voting_record_sql.set_up_connection() # SQL Set Up - Used for storing bill records bill_record_sql = SqlStorer(db_name="PoliticianInfo", table_name="bill_info_table") bill_record_sql.set_up_connection() # Pulls the most possible bills, importantly: pulls bill action get_bills_by_official_url = "http://api.votesmart.org/Votes.getByOfficial?key=9125e6afe9e4b86c1dd87da7f889e7b9&o=JSON&candidateId=%s" # get_categories_url= "http://api.votesmart.org/Votes.getCategories?key=9125e6afe9e4b86c1dd87da7f889e7b9&o=JSON&year=2019"
from poly_scraper import PolyScraper from sql_storer import SqlStorer urllib3.disable_warnings() url = "https://www.billtrack50.com/LegislatorDetail/" http = urllib3.PoolManager() urls = [ "https://www.billtrack50.com/LegislatorDetail/3308", "https://www.billtrack50.com/LegislatorDetail/15712", "https://www.billtrack50.com/LegislatorDetail/15789" ] legislature_dict = defaultdict(lambda: defaultdict(list)) my_sql = SqlStorer("PoliticianInfo", "PoliticianTable") my_sql.set_up_connection() my_sql.create_table() def get_name(soup): name = soup.find(id='lblLegislator').string name = name.replace(" ", " ") name = name.split('-')[0] name = name.replace("'", "''") return name def get_ballotpedia(soup): bal_link = "" try:
app = Flask(__name__) CORS(app) politician_sql_db_name = "PoliticianInfo" politician_sql_table_name = "politiciantable" news_sql_db_name = "newsarticlesdb" news_sql_table_name = "politician_based_newsarticlestable" # ------------------------------------------------ # ------------------------------------------------ # ---------------- POLITICIAN DATA --------------- # ------------------------------------------------ # ------------------------------------------------ state_partisanships_retriever = SqlStorer(db_name=politician_sql_db_name, table_name=politician_sql_table_name) state_partisanships_retriever.set_up_connection() # -------------------------------- # -------------------------------- # -----------DEPRECATED----------- # -------------------------------- # -------------------------------- @app.route('/request_senator_profile_img/', methods=['GET']) def request_senator_profile_img(): state = request.args.get('state') role = request.args.get('role') query_dict = {"State": state, "Role": role}