def collecGDPInfo(state_abbr=[], year_range=YEAR_RANGE): """ Collect GDP using 'getGDPInfo()' and write to csv file (This is metropolitan level) :param state_abbr: List of selected states that the data will be collected and write to file :param year_range: List of year that the data will be collected. This can be the year that data is not available :return: None """ # Variables gdp_info_header = ["area_id", "area_name", "area_type", "year"] gdp_info_types = ["per_capita_gdp", "per_capita_gdp_percent_change"] # Delete all old data and write header to new file gdp_info_file_name = "gdp_info.csv" with open(gdp_info_file_name, 'w') as f: f.write(",".join(gdp_info_header + gdp_info_types) + '\n') place_in_us_count = 0 gdp_info_count = 0 # How many places we found for gdp info for state in list(states.keys()): if len(state_abbr) > 0 and not state in state_abbr: continue # If specify states areas = getAreas( state, ["region.msa" ]) # GDP appears only in Metro Area level, not city level place_in_us_count += len(areas) for area in areas: for year in year_range: print("Gathering - State: " + state + " Area: " + area["name"] + " Year: " + str(year)) ##### GDP Info gdp_infos = getGDPInfo(area["id"], year) if len(gdp_infos) > 0: # Not empty gdp_info_count += 1 row = [ str(area["id"]), area["name"], area["type"], str(year) ] gdp_info_row = [""] * len(gdp_info_types) for gdp_info in gdp_infos: gdp_info_name = gdp_info[0] for idx, gdp_info_type in enumerate(gdp_info_types): if gdp_info_name == gdp_info_type: gdp_info_row[idx] = str(gdp_info[1]) # Write to file with open(gdp_info_file_name, 'a') as f: row = list(map((lambda x: x.replace(",", "-")), row)) # Remove ',' in area names line = ",".join(row + gdp_info_row) f.write(line + "\n") # General Summary print("\n########## General Summary ##########") print("Place number in US: " + str(place_in_us_count)) print("Place with gdp_info number in US: " + str(gdp_info_count))
def main(): ### build a dictionary to store the term score sent_file = open(sys.argv[1]) scores = {} for line in sent_file: term, score = line.split("\t") scores[term] = int(score) # print scores.items() ## Load the tweet json data stateScore = defaultdict(list) with open(sys.argv[2]) as f: for line in f: tweet = json.loads(line, "utf-8") #pprint(tweet) #print(sentiment_tweet(scores,tweet)) s = sentiment_tweet(scores, tweet) if "place" in tweet.keys(): if not tweet['place'] is None: if tweet['place']['country_code'] == "US": fullname = tweet['place']['full_name'] city, state = fullname.split(",") #state=str(state) state = state.strip() if not state in states.keys(): continue if state in stateScore.keys(): stateScore[state] = [s] else: stateScore[state].append(s) ## output for st, lst in stateScore.items(): t = 0 for v in lst: t = t + v avgs = t / len(lst) print(st + " " + str(avgs))
def on_success(self, data): if 'text' in data: candidate_name = get_candidate_name(data) if candidate_name: data['candidate_name'] = candidate_name else: return user_id = data['user']['id'] if should_ignore_user(user_id): return ############################################ print_err(str(data['geo'])) ori_tweet = data['text'].encode('utf-8') tweet = tweet_text_clean_up(ori_tweet) state_location = 'none' if is_valid_tweet(tweet) and data['user']['lang']=='en': print_err('-' * 50 + '\n' + tweet + '\n') if data['user'].get('location'): location = re.sub(r'[^A-Za-z\d ]','',str(data['user']['location'].encode('utf-8'))).lower() print_err(location) location = location.split(' ') location = map(lambda x: x.lower().strip(), location) print_err(str(location)) for state in states.keys(): vals = map(lambda x:x.lower().strip(),states[state]) if any(map(lambda v: v in vals, location)): state_location = state data['state'] = state_location print_err(state_location) if data['user']['lang'] =='en': print json.dumps(data) else: print_err(ori_tweet + '\n')
def main(): ### build a dictionary to store the term score sent_file = open(sys.argv[1]) scores = {} for line in sent_file: term, score = line.split("\t") scores[term] = int(score) # print scores.items() ## Load the tweet json data stateScore = defaultdict(list) with open(sys.argv[2]) as f: for line in f: tweet = json.loads(line,"utf-8"); #pprint(tweet) #print(sentiment_tweet(scores,tweet)) s = sentiment_tweet(scores,tweet) if "place" in tweet.keys(): if not tweet['place'] is None: if tweet['place']['country_code'] == "US": fullname = tweet['place']['full_name'] city,state = fullname.split(",") #state=str(state) state = state.strip() if not state in states.keys(): continue if state in stateScore.keys(): stateScore[state]=[s] else: stateScore[state].append(s) ## output for st,lst in stateScore.items(): t=0 for v in lst: t=t+v avgs=t/len(lst) print(st+" "+str(avgs))
class ChurchesSpider(scrapy.Spider): name = 'churches' allowed_domains = ['www.churchfinder.com'] start_urls = [ f'https://www.churchfinder.com/churches/{state.lower()}' for state in states.keys() ] def parse(self, response): links = response.css('.field-content > a::attr(href)').extract() for link in links: yield scrapy.Request(response.urljoin(link), self.parse_city) def parse_city(self, response): next_page = response.css('.pager-next > a::attr(href)').extract_first() if next_page: yield scrapy.Request(response.urljoin(next_page), self.parse_city) churches = response.css('#content .views-row') for church in churches: name = church.css('.views-field-title a::text').extract_first() if name: yield { 'city_url': response.url, 'url': church.css( '.views-field-title a::attr(href)').extract_first(), 'name': name, 'address': church.css('.field-name-field-address .field-item::text'). extract_first(), 'denomination': church.css('.field-name-field-specific-denomination::text' ).extract_first(), }
def collectGraduationRates(state_abbr=[], year_range=YEAR_RANGE): """ Collect graduation rates using 'getGraduationRates()' and write to csv file :param state_abbr: List of selected states that the data will be collected and write to file :param year_range: List of year that the data will be collected. This can be the year that data is not available :return: None """ # Variables graduation_rates_header = ["area_id", "area_name", "area_type", "year"] graduation_rate_types = [ "percent_associates_degree", "percent_bachelors_degree_or_higher", "percent_graduate_or_professional_degree", "percent_high_school_graduate_or_higher", "percent_less_than_9th_grade" ] # Delete all old data and write header to new file graduation_rates_file_name = "graduation_rates.csv" with open(graduation_rates_file_name, 'w') as f: f.write(",".join(graduation_rates_header + graduation_rate_types) + '\n') count = 0 graduation_rates_count = 0 for state in list(states.keys()): if len(state_abbr) > 0 and not state in state_abbr: continue # If specify states areas = getAreas(state, ["region.place"]) count += len(areas) for area in areas: for year in year_range: print("Gathering - State: " + state + " Area: " + area["name"] + " Year: " + str(year)) ##### Graduation Rates graduation_rates = getGraduationRates(area["id"], year) if len(graduation_rates) > 0: # Not empty graduation_rates_count += 1 row = [ str(area["id"]), area["name"], area["type"], str(year) ] rates = [""] * len(graduation_rate_types) for graduation_rate in graduation_rates: rate_name = graduation_rate[0] # There are 5 graduation rate types in general for idx, graduation_rate_type in enumerate( graduation_rate_types): if rate_name == graduation_rate_type: rates[idx] = str(graduation_rate[1]) # Write to file with open(graduation_rates_file_name, 'a') as f: row = list(map((lambda x: x.replace(",", "-")), row)) # Remove ',' in area names line = ",".join(row + rates) f.write(line + "\n") # General Summary print("\n########## General Summary ##########") print("Place number in US: " + str(count)) print("Place with graduation_rates number in US: " + str(graduation_rates_count))
def collectEarningInfo(state_abbr=[], year_range=YEAR_RANGE): """ Collect Earning information using 'getEarningInfo()' and write to csv file :param state_abbr: List of selected states that the data will be collected and write to file :param year_range: List of year that the data will be collected. This can be the year that data is not available :return: None """ # Variables earning_info_header = ["area_id", "area_name", "area_type", "year"] earning_info_types = [ "female_full_time_median_earnings", "female_median_earnings", "male_full_time_median_earnings", "male_median_earnings", "median_earnings", "median_earnings_bachelor_degree", "median_earnings_graduate_or_professional_degree", "median_earnings_high_school", "median_earnings_less_than_high_school", "median_earnings_some_college_or_associates", "percent_with_earnings_10000_to_14999", "percent_with_earnings_15000_to_24999", "percent_with_earnings_1_to_9999", "percent_with_earnings_25000_to_34999", "percent_with_earnings_35000_to_49999", "percent_with_earnings_50000_to_64999", "percent_with_earnings_65000_to_74999", "percent_with_earnings_75000_to_99999", "percent_with_earnings_over_100000" ] # Delete all old data and write header to new file earning_info_file_name = "earning_info.csv" with open(earning_info_file_name, 'w') as f: f.write(",".join(earning_info_header + earning_info_types) + '\n') place_in_us_count = 0 earning_info_count = 0 # How many places we found for earning info for state in list(states.keys()): if len(state_abbr) > 0 and not state in state_abbr: continue # If specify states areas = getAreas(state, ["region.place"]) place_in_us_count += len(areas) for area in areas: for year in year_range: print("Gathering - State: " + state + " Area: " + area["name"] + " Year: " + str(year)) ##### Earning Info earning_infos = getEarningInfo(area["id"], year) if len(earning_infos) > 0: # Not empty earning_info_count += 1 row = [ str(area["id"]), area["name"], area["type"], str(year) ] earning_info_row = [""] * len(earning_info_types) for earning_info in earning_infos: earning_info_name = earning_info[0] for idx, earning_info_type in enumerate( earning_info_types): if earning_info_name == earning_info_type: earning_info_row[idx] = str(earning_info[1]) # Write to file with open(earning_info_file_name, 'a') as f: row = list(map((lambda x: x.replace(",", "-")), row)) # Remove ',' in area names line = ",".join(row + earning_info_row) f.write(line + "\n") # General Summary print("\n########## General Summary ##########") print("Place number in US: " + str(place_in_us_count)) print("Place with earning_info number in US: " + str(earning_info_count))
def collectCrimeRates(state_abbr=[], year_range=YEAR_RANGE): """ Collect crime rates using 'getCrimeRates()' and write to csv file :param state_abbr: List of selected states that the data will be collected and write to file :param year_range: List of year that the data will be collected. This can be the year that data is not available :return: None """ # Variables crime_rates_header = ["area_id", "area_name", "area_type", "year"] crime_rate_types = [ "Aggravated assault", "All Crimes", "Burglary", "Larceny", "Motor vehicle theft", "Murder and nonnegligent manslaughter", "Property crime", "Rape (revised definition)", "Robbery", "Violent crime" ] # Delete all old data and write header to new file crime_rates_file_name = "crime_rates.csv" with open(crime_rates_file_name, 'w') as f: f.write(",".join(crime_rates_header + crime_rate_types) + '\n') place_in_us_count = 0 crime_rates_count = 0 # How many places we found for crime rates for state in list(states.keys()): if len(state_abbr) > 0 and not state in state_abbr: continue # If specify states areas = getAreas(state, ["region.place"]) place_in_us_count += len(areas) for area in areas: for year in year_range: print("Gathering - State: " + state + " Area: " + area["name"] + " Year: " + str(year)) ##### Crime Rates crime_rates = getCrimeRates(area["id"], year) if len(crime_rates) > 0: # Not empty crime_rates_count += 1 row = [ str(area["id"]), area["name"], area["type"], str(year) ] crime_rates_row = [""] * len(crime_rate_types) for crime_rate in crime_rates: crime_name = crime_rate[0] for idx, crime_rate_type in enumerate( crime_rate_types): if crime_name == crime_rate_type: crime_rates_row[idx] = str(crime_rate[1]) # Write to file with open(crime_rates_file_name, 'a') as f: row = list(map((lambda x: x.replace(",", "-")), row)) # Remove ',' in area names line = ",".join(row + crime_rates_row) f.write(line + "\n") # General Summary print("\n########## General Summary ##########") print("Place number in US: " + str(place_in_us_count)) print("Place with crime_rates number in US: " + str(crime_rates_count))
""" """ colors = sns.color_palette(n_colors=3, palette='bright') sns.distplot(geo_partisan_vec, ax=ax, color=colors[0], hist=False, label='Geographic Distance', rug=False, kde_kws={'bw':.2, 'gridsize':150, 'clip': clip, 'shade':False}) sns.distplot(final_partisan_vec, ax=ax, color=colors[1], hist=False, label='Geographic and Demographic Distance', rug=False, kde_kws={'bw':.2, 'gridsize':150, 'clip': clip, 'shade':False}) sns.distplot(original_partisan_vec, ax=ax, color=colors[2], hist=False, label='Original Districts', rug=False, kde_kws={'bw':.2, 'gridsize':150, 'clip': clip, 'shade':False}) ax.tick_params(axis='both', which='major', labelsize=15) ax.tick_params(axis='both', which='minor', labelsize=15) ax.set_xlim(clip) ax.legend([], frameon=False) if __name__ == "__main__": state_list = list(states.keys()) state_list.sort(key=itemgetter(0)) state_list.remove('CO') fig, axes = plt.subplots(len(state_list), 1,figsize=(7, 17), sharex=False) for state, ax in zip(state_list, axes): with open('../maps/'+state+'_10pct/static/kde_data_'+state+'.p', 'rb') as f: [geo_partisan_vec, final_partisan_vec, original_partisan_vec, geo_demog_vec, final_demog_vec, original_demog_vec] = pickle.load(f) clip = (0, 1) make_histograms(geo_partisan_vec, final_partisan_vec, original_partisan_vec, ax, clip) ax.set_ylabel(state, fontsize=15) ax.set_yticks([]) ax.set_yticklabels([])
""" ) quizFile.write( (' ' * 20) + f'State Capitals Quiz (Form {quizNum + 1}) \n' ) # Create answer file ansFile = open(f'quizesAns/quizAns{quizNum + 1}.rtf', 'a') # A Header For Every Answers File ansFile.write( (' ' * 20) + f'State Capitals Quiz Model Answer (Form {quizNum + 1}) \n' ) # Shuffle the order of the states states = list(capitals.keys()) random.shuffle(states) # States list is shuffled evertime # Questions in the files for count, state in enumerate(states): # # # creating choices capitalsValues = list(capitals.values()) random.shuffle(capitalsValues) choices = [] choices.append(capitals[state]) for i in range(2): choices.append(capitalsValues[i]) random.shuffle(choices) # # # end