def collecGDPInfo(state_abbr=[], year_range=YEAR_RANGE):
    """
    Collect GDP using 'getGDPInfo()' and write to csv file (This is metropolitan level)
    :param state_abbr: List of selected states that the data will be collected and write to file
    :param year_range: List of year that the data will be collected. This can be the year that data is not available
    :return: None
    """

    # Variables
    gdp_info_header = ["area_id", "area_name", "area_type", "year"]
    gdp_info_types = ["per_capita_gdp", "per_capita_gdp_percent_change"]

    # Delete all old data and write header to new file
    gdp_info_file_name = "gdp_info.csv"
    with open(gdp_info_file_name, 'w') as f:
        f.write(",".join(gdp_info_header + gdp_info_types) + '\n')

    place_in_us_count = 0
    gdp_info_count = 0  # How many places we found for gdp info
    for state in list(states.keys()):
        if len(state_abbr) > 0 and not state in state_abbr:
            continue  # If specify states
        areas = getAreas(
            state, ["region.msa"
                    ])  # GDP appears only in Metro Area level, not city level
        place_in_us_count += len(areas)

        for area in areas:
            for year in year_range:
                print("Gathering - State: " + state + " Area: " +
                      area["name"] + " Year: " + str(year))

                ##### GDP Info
                gdp_infos = getGDPInfo(area["id"], year)
                if len(gdp_infos) > 0:  # Not empty
                    gdp_info_count += 1
                    row = [
                        str(area["id"]), area["name"], area["type"],
                        str(year)
                    ]
                    gdp_info_row = [""] * len(gdp_info_types)

                    for gdp_info in gdp_infos:
                        gdp_info_name = gdp_info[0]

                        for idx, gdp_info_type in enumerate(gdp_info_types):
                            if gdp_info_name == gdp_info_type:
                                gdp_info_row[idx] = str(gdp_info[1])

                    # Write to file
                    with open(gdp_info_file_name, 'a') as f:
                        row = list(map((lambda x: x.replace(",", "-")),
                                       row))  # Remove ',' in area names
                        line = ",".join(row + gdp_info_row)
                        f.write(line + "\n")

    # General Summary
    print("\n########## General Summary ##########")
    print("Place number in US: " + str(place_in_us_count))
    print("Place with gdp_info number in US: " + str(gdp_info_count))
Exemplo n.º 2
0
def main():
    ### build a dictionary to store the term score
    sent_file = open(sys.argv[1])
    scores = {}
    for line in sent_file:
        term, score = line.split("\t")
        scores[term] = int(score)


#   print scores.items()

##  Load the tweet json data
    stateScore = defaultdict(list)

    with open(sys.argv[2]) as f:
        for line in f:
            tweet = json.loads(line, "utf-8")
            #pprint(tweet)
            #print(sentiment_tweet(scores,tweet))
            s = sentiment_tweet(scores, tweet)
            if "place" in tweet.keys():
                if not tweet['place'] is None:
                    if tweet['place']['country_code'] == "US":
                        fullname = tweet['place']['full_name']
                        city, state = fullname.split(",")
                        #state=str(state)
                        state = state.strip()
                        if not state in states.keys():
                            continue
                        if state in stateScore.keys():
                            stateScore[state] = [s]
                        else:
                            stateScore[state].append(s)
    ## output
    for st, lst in stateScore.items():
        t = 0
        for v in lst:
            t = t + v
        avgs = t / len(lst)

        print(st + " " + str(avgs))
    def on_success(self, data):
        if 'text' in data:
            candidate_name = get_candidate_name(data)
            if candidate_name:
                data['candidate_name'] = candidate_name
            else:
                return

            user_id = data['user']['id']

            if should_ignore_user(user_id):
                return

            ############################################
            print_err(str(data['geo']))

            ori_tweet = data['text'].encode('utf-8')
            tweet = tweet_text_clean_up(ori_tweet)
            state_location = 'none'
            if is_valid_tweet(tweet) and data['user']['lang']=='en':
                print_err('-' * 50 + '\n' + tweet + '\n')
                if data['user'].get('location'):
                    location = re.sub(r'[^A-Za-z\d ]','',str(data['user']['location'].encode('utf-8'))).lower()
                    print_err(location)
                    location = location.split(' ')

                    location = map(lambda x: x.lower().strip(), location)
                    print_err(str(location))
                    for state in states.keys():
                        vals = map(lambda x:x.lower().strip(),states[state])
                        if any(map(lambda v: v in vals, location)):
                           state_location = state
            data['state'] = state_location
            print_err(state_location)


            if data['user']['lang'] =='en':
                    print json.dumps(data)
            else:
                print_err(ori_tweet + '\n')
def main():
    ### build a dictionary to store the term score
    sent_file = open(sys.argv[1])
    scores = {}
    for line in sent_file:
        term, score = line.split("\t")
        scores[term] = int(score)

#   print scores.items()
    
##  Load the tweet json data
    stateScore = defaultdict(list)

    with open(sys.argv[2]) as f:
        for line in f:
            tweet = json.loads(line,"utf-8");
            #pprint(tweet)
            #print(sentiment_tweet(scores,tweet))
            s = sentiment_tweet(scores,tweet)
            if "place" in tweet.keys():
                if not tweet['place'] is None:
                    if tweet['place']['country_code'] == "US":
                        fullname = tweet['place']['full_name']
                        city,state = fullname.split(",")
                        #state=str(state)
                        state = state.strip()
                        if not state in states.keys():
                            continue
                        if state in stateScore.keys():
                            stateScore[state]=[s]
                        else:
                            stateScore[state].append(s)
    ## output
    for st,lst in stateScore.items():
        t=0
        for v in lst:
            t=t+v
        avgs=t/len(lst)

        print(st+" "+str(avgs))
Exemplo n.º 5
0
class ChurchesSpider(scrapy.Spider):
    name = 'churches'
    allowed_domains = ['www.churchfinder.com']
    start_urls = [
        f'https://www.churchfinder.com/churches/{state.lower()}'
        for state in states.keys()
    ]

    def parse(self, response):
        links = response.css('.field-content > a::attr(href)').extract()
        for link in links:
            yield scrapy.Request(response.urljoin(link), self.parse_city)

    def parse_city(self, response):
        next_page = response.css('.pager-next > a::attr(href)').extract_first()
        if next_page:
            yield scrapy.Request(response.urljoin(next_page), self.parse_city)

        churches = response.css('#content .views-row')
        for church in churches:
            name = church.css('.views-field-title a::text').extract_first()
            if name:
                yield {
                    'city_url':
                    response.url,
                    'url':
                    church.css(
                        '.views-field-title a::attr(href)').extract_first(),
                    'name':
                    name,
                    'address':
                    church.css('.field-name-field-address .field-item::text').
                    extract_first(),
                    'denomination':
                    church.css('.field-name-field-specific-denomination::text'
                               ).extract_first(),
                }
def collectGraduationRates(state_abbr=[], year_range=YEAR_RANGE):
    """
    Collect graduation rates using 'getGraduationRates()' and write to csv file
    :param state_abbr: List of selected states that the data will be collected and write to file
    :param year_range: List of year that the data will be collected. This can be the year that data is not available
    :return: None
    """

    # Variables
    graduation_rates_header = ["area_id", "area_name", "area_type", "year"]
    graduation_rate_types = [
        "percent_associates_degree", "percent_bachelors_degree_or_higher",
        "percent_graduate_or_professional_degree",
        "percent_high_school_graduate_or_higher", "percent_less_than_9th_grade"
    ]

    # Delete all old data and write header to new file
    graduation_rates_file_name = "graduation_rates.csv"
    with open(graduation_rates_file_name, 'w') as f:
        f.write(",".join(graduation_rates_header + graduation_rate_types) +
                '\n')

    count = 0
    graduation_rates_count = 0
    for state in list(states.keys()):
        if len(state_abbr) > 0 and not state in state_abbr:
            continue  # If specify states
        areas = getAreas(state, ["region.place"])
        count += len(areas)

        for area in areas:
            for year in year_range:
                print("Gathering - State: " + state + " Area: " +
                      area["name"] + " Year: " + str(year))

                ##### Graduation Rates
                graduation_rates = getGraduationRates(area["id"], year)
                if len(graduation_rates) > 0:  # Not empty
                    graduation_rates_count += 1
                    row = [
                        str(area["id"]), area["name"], area["type"],
                        str(year)
                    ]
                    rates = [""] * len(graduation_rate_types)
                    for graduation_rate in graduation_rates:
                        rate_name = graduation_rate[0]

                        # There are 5 graduation rate types in general
                        for idx, graduation_rate_type in enumerate(
                                graduation_rate_types):
                            if rate_name == graduation_rate_type:
                                rates[idx] = str(graduation_rate[1])

                    # Write to file
                    with open(graduation_rates_file_name, 'a') as f:
                        row = list(map((lambda x: x.replace(",", "-")),
                                       row))  # Remove ',' in area names
                        line = ",".join(row + rates)
                        f.write(line + "\n")

    # General Summary
    print("\n########## General Summary ##########")
    print("Place number in US: " + str(count))
    print("Place with graduation_rates number in US: " +
          str(graduation_rates_count))
def collectEarningInfo(state_abbr=[], year_range=YEAR_RANGE):
    """
    Collect Earning information using 'getEarningInfo()' and write to csv file
    :param state_abbr: List of selected states that the data will be collected and write to file
    :param year_range: List of year that the data will be collected. This can be the year that data is not available
    :return: None
    """

    # Variables
    earning_info_header = ["area_id", "area_name", "area_type", "year"]
    earning_info_types = [
        "female_full_time_median_earnings", "female_median_earnings",
        "male_full_time_median_earnings", "male_median_earnings",
        "median_earnings", "median_earnings_bachelor_degree",
        "median_earnings_graduate_or_professional_degree",
        "median_earnings_high_school", "median_earnings_less_than_high_school",
        "median_earnings_some_college_or_associates",
        "percent_with_earnings_10000_to_14999",
        "percent_with_earnings_15000_to_24999",
        "percent_with_earnings_1_to_9999",
        "percent_with_earnings_25000_to_34999",
        "percent_with_earnings_35000_to_49999",
        "percent_with_earnings_50000_to_64999",
        "percent_with_earnings_65000_to_74999",
        "percent_with_earnings_75000_to_99999",
        "percent_with_earnings_over_100000"
    ]

    # Delete all old data and write header to new file
    earning_info_file_name = "earning_info.csv"
    with open(earning_info_file_name, 'w') as f:
        f.write(",".join(earning_info_header + earning_info_types) + '\n')

    place_in_us_count = 0
    earning_info_count = 0  # How many places we found for earning info
    for state in list(states.keys()):
        if len(state_abbr) > 0 and not state in state_abbr:
            continue  # If specify states
        areas = getAreas(state, ["region.place"])
        place_in_us_count += len(areas)

        for area in areas:
            for year in year_range:
                print("Gathering - State: " + state + " Area: " +
                      area["name"] + " Year: " + str(year))

                ##### Earning Info
                earning_infos = getEarningInfo(area["id"], year)
                if len(earning_infos) > 0:  # Not empty
                    earning_info_count += 1
                    row = [
                        str(area["id"]), area["name"], area["type"],
                        str(year)
                    ]
                    earning_info_row = [""] * len(earning_info_types)

                    for earning_info in earning_infos:
                        earning_info_name = earning_info[0]

                        for idx, earning_info_type in enumerate(
                                earning_info_types):
                            if earning_info_name == earning_info_type:
                                earning_info_row[idx] = str(earning_info[1])

                    # Write to file
                    with open(earning_info_file_name, 'a') as f:
                        row = list(map((lambda x: x.replace(",", "-")),
                                       row))  # Remove ',' in area names
                        line = ",".join(row + earning_info_row)
                        f.write(line + "\n")

    # General Summary
    print("\n########## General Summary ##########")
    print("Place number in US: " + str(place_in_us_count))
    print("Place with earning_info number in US: " + str(earning_info_count))
def collectCrimeRates(state_abbr=[], year_range=YEAR_RANGE):
    """
    Collect crime rates using 'getCrimeRates()' and write to csv file
    :param state_abbr: List of selected states that the data will be collected and write to file
    :param year_range: List of year that the data will be collected. This can be the year that data is not available
    :return: None
    """

    # Variables
    crime_rates_header = ["area_id", "area_name", "area_type", "year"]
    crime_rate_types = [
        "Aggravated assault", "All Crimes", "Burglary", "Larceny",
        "Motor vehicle theft", "Murder and nonnegligent manslaughter",
        "Property crime", "Rape (revised definition)", "Robbery",
        "Violent crime"
    ]

    # Delete all old data and write header to new file
    crime_rates_file_name = "crime_rates.csv"
    with open(crime_rates_file_name, 'w') as f:
        f.write(",".join(crime_rates_header + crime_rate_types) + '\n')

    place_in_us_count = 0
    crime_rates_count = 0  # How many places we found for crime rates
    for state in list(states.keys()):
        if len(state_abbr) > 0 and not state in state_abbr:
            continue  # If specify states
        areas = getAreas(state, ["region.place"])
        place_in_us_count += len(areas)

        for area in areas:
            for year in year_range:
                print("Gathering - State: " + state + " Area: " +
                      area["name"] + " Year: " + str(year))

                ##### Crime Rates
                crime_rates = getCrimeRates(area["id"], year)
                if len(crime_rates) > 0:  # Not empty
                    crime_rates_count += 1
                    row = [
                        str(area["id"]), area["name"], area["type"],
                        str(year)
                    ]
                    crime_rates_row = [""] * len(crime_rate_types)

                    for crime_rate in crime_rates:
                        crime_name = crime_rate[0]

                        for idx, crime_rate_type in enumerate(
                                crime_rate_types):
                            if crime_name == crime_rate_type:
                                crime_rates_row[idx] = str(crime_rate[1])

                    # Write to file
                    with open(crime_rates_file_name, 'a') as f:
                        row = list(map((lambda x: x.replace(",", "-")),
                                       row))  # Remove ',' in area names
                        line = ",".join(row + crime_rates_row)
                        f.write(line + "\n")

    # General Summary
    print("\n########## General Summary ##########")
    print("Place number in US: " + str(place_in_us_count))
    print("Place with crime_rates number in US: " + str(crime_rates_count))
Exemplo n.º 9
0
	"""
	"""
	colors = sns.color_palette(n_colors=3, palette='bright')
	sns.distplot(geo_partisan_vec, ax=ax, color=colors[0], hist=False, label='Geographic Distance', rug=False, kde_kws={'bw':.2,  'gridsize':150, 'clip': clip, 'shade':False})
	sns.distplot(final_partisan_vec, ax=ax, color=colors[1], hist=False, label='Geographic and Demographic Distance', rug=False, kde_kws={'bw':.2,  'gridsize':150, 'clip': clip, 'shade':False})
	sns.distplot(original_partisan_vec, ax=ax, color=colors[2], hist=False, label='Original Districts', rug=False, kde_kws={'bw':.2,  'gridsize':150, 'clip': clip, 'shade':False})
	
	ax.tick_params(axis='both', which='major', labelsize=15)
	ax.tick_params(axis='both', which='minor', labelsize=15)
	ax.set_xlim(clip)
	ax.legend([], frameon=False)


if __name__ == "__main__":	

	state_list = list(states.keys())
	state_list.sort(key=itemgetter(0))
	state_list.remove('CO')

	fig, axes = plt.subplots(len(state_list), 1,figsize=(7, 17), sharex=False)

	for state, ax in zip(state_list, axes):

		with open('../maps/'+state+'_10pct/static/kde_data_'+state+'.p', 'rb') as f:
			[geo_partisan_vec, final_partisan_vec, original_partisan_vec, geo_demog_vec, final_demog_vec, original_demog_vec] = pickle.load(f)

		clip = (0, 1)
		make_histograms(geo_partisan_vec, final_partisan_vec, original_partisan_vec, ax, clip)
		ax.set_ylabel(state, fontsize=15)
		ax.set_yticks([])
		ax.set_yticklabels([])
Exemplo n.º 10
0
        """
    )
    quizFile.write(
        (' ' * 20) + f'State Capitals Quiz (Form {quizNum + 1}) \n'
    )

    # Create answer file
    ansFile = open(f'quizesAns/quizAns{quizNum + 1}.rtf', 'a')
    # A Header For Every Answers File
    ansFile.write(
        (' ' * 20) +
        f'State Capitals Quiz Model Answer (Form {quizNum + 1}) \n'
    )

    # Shuffle the order of the states
    states = list(capitals.keys())
    random.shuffle(states)  # States list is shuffled evertime

    # Questions in the files
    for count, state in enumerate(states):

        # # #   creating choices
        capitalsValues = list(capitals.values())
        random.shuffle(capitalsValues)
        choices = []
        choices.append(capitals[state])
        for i in range(2):
            choices.append(capitalsValues[i])
        random.shuffle(choices)
        # # #   end