示例#1
0
def group_tweets_by_state(tweets):
    """Return a dictionary that groups tweets by their nearest state center.

    The keys of the returned dictionary are state names and the values are
    lists of tweets that appear closer to that state center than any other.

    Arguments:
    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> two_tweets_by_state = group_tweets_by_state([sf, ny])
    >>> len(two_tweets_by_state)
    2
    >>> california_tweets = two_tweets_by_state['CA']
    >>> len(california_tweets)
    1
    >>> tweet_string(california_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    State_Centroids = {State:find_state_center(us_states[State]) for State in us_states.keys()} #makes a dictionary in the format: {State: Centroid}
    current_lst = []
    for tweet in tweets:
        closest_tweet = 1 + geo_distance(tweet_location(tweet), State_Centroids['CA']) # Default Comparison
        for state in us_states.keys():
            distance = geo_distance(tweet_location(tweet), State_Centroids[state])
            if distance < closest_tweet:
                closest_tweet = distance
                closest_state = state
        current_lst += [[closest_state] + [tweet]]
    return group_by_key(current_lst)
示例#2
0
def find_closest_state(tweet, state_centers):
    """Return the name of the state closest to the given tweet's location.

    Use the geo_distance function (already provided) to calculate distance
    in miles between two latitude-longitude positions.

    Arguments:
    tweet -- a tweet abstract data type
    state_centers -- a dictionary from state names to positions.

    >>> us_centers = {n: find_center(s) for n, s in us_states.items()}
    >>> lbg = make_tweet("welcome to lewisburg!", None, 40.96, -76.89)
    >>> sf = make_tweet("welcome to san Francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new York", None, 41, -74)
    >>> find_closest_state(sf, us_centers)
    'CA'
    >>> find_closest_state(ny, us_centers)
    'NJ'
    >>> find_closest_state(lbg, us_centers)
    'PA'
    """

    tweetPosition = tweet_location(tweet)
    stateClose = 'XX'
    minDistance = 10000.0
    for stateCode in us_states.keys():
        
        distance = geo_distance(tweetPosition, state_centers[stateCode])
        if distance < minDistance:
            minDistance = distance
            stateClose = stateCode
            
    return stateClose   
示例#3
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> two_tweets_by_state = group_tweets_by_state([sf, ny])
    >>> len(two_tweets_by_state)
    2
    >>> california_tweets = two_tweets_by_state['CA']
    >>> len(california_tweets)
    1
    >>> tweet_string(california_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    "*** YOUR CODE HERE ***"
    from collections import defaultdict
    tweets_by_state = defaultdict(lambda: None)
    us_centers = {n: find_state_center(s) for n, s in us_states.items()}
    for tweet in tweets:
        dist_from_center = lambda name: geo_distance(tweet_location(tweet),
                                                     us_centers[name])
        state = sorted(us_states.keys(), key=dist_from_center)[0]
        if tweets_by_state[state] is None:
            tweets_by_state[state] = [tweet]
        else:
            tweets_by_state[state].append(tweet)
    return tweets_by_state
示例#4
0
文件: trends.py 项目: m0cahxD/cs61a
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> two_tweets_by_state = group_tweets_by_state([sf, ny])
    >>> len(two_tweets_by_state)
    2
    >>> california_tweets = two_tweets_by_state['CA']
    >>> len(california_tweets)
    1
    >>> tweet_string(california_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    tweets_by_state = {}
    "*** YOUR CODE HERE ***"
    min_so_far = float('inf')
    going_the_distance = ''
    for x in tweets:
        for y in us_states.keys():
            dist = geo_distance(tweet_location(x), find_state_center(us_states[y]))
            if dist < min_so_far:
                going_the_distance = y
                min_so_far = dist
        if going_the_distance not in tweets_by_state.keys():
            tweets_by_state[going_the_distance] = []
        tweets_by_state[going_the_distance].append(x)

    return tweets_by_state
示例#5
0
文件: trends.py 项目: FZSS/trends
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> two_tweets_by_state = group_tweets_by_state([sf, ny])
    >>> len(two_tweets_by_state)
    2
    >>> california_tweets = two_tweets_by_state['CA']
    >>> len(california_tweets)
    1
    >>> tweet_string(california_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    tweets_by_state = {}
    for tweet in tweets:
        nearest_distance = 100000000000
        nearest_state = ""
        for state in us_states.keys():
            distance = geo_distance(find_state_center(us_states[state]), tweet_location(tweet))
            if distance < nearest_distance:
                nearest_distance = distance
                nearest_state = state
        if nearest_state in tweets_by_state.keys():
            tweets_by_state[nearest_state].append(tweet)
        else:
            tweets_by_state[nearest_state] = [tweet] 
    return tweets_by_state
示例#6
0
def group_tweets_by_state(tweets):
    """Return a dictionary that groups tweets by their nearest state center.

    The keys of the returned dictionary are state names and the values are
    lists of tweets that appear closer to that state center than any other.

    Arguments:
    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> two_tweets_by_state = group_tweets_by_state([sf, ny])
    >>> len(two_tweets_by_state)
    2
    >>> california_tweets = two_tweets_by_state['CA']
    >>> len(california_tweets)
    1
    >>> tweet_string(california_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    "*** YOUR CODE HERE ***"
    min = 1000000
    lst = []
    for tweet in tweets:
        position = tweet_location(tweet)
        for key in us_states.keys():
            center = find_state_center(us_states[key])
            if geo_distance(center, position) < min:
                min = geo_distance(center, position)
                state = key
        lst = lst + [[state, tweet]]
    return group_by_key(lst)
示例#7
0
文件: trends.py 项目: AlfiyaZi/trends
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> two_tweets_by_state = group_tweets_by_state([sf, ny])
    >>> len(two_tweets_by_state)
    2
    >>> california_tweets = two_tweets_by_state['CA']
    >>> len(california_tweets)
    1
    >>> tweet_string(california_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    tweets_by_state = {}
    states_centers = {state: find_state_center(us_states[state]) for state in us_states.keys()} #generates dictionary with states and their center positions   
    for tweet in tweets:
      closest = 999999999999 #initialize to very large distance value
      name = '' #initialize closest state name
      for state in states_centers:
        distance = geo_distance(tweet_location(tweet), states_centers[state]) #calculates distance to  all state centers 
        if distance < closest:
          closest = distance #saves closest distance and state name if new state is closer than previous best
          name = state
      #add tweet to appropriate entry or create new entry if nonexistent:
      if name not in tweets_by_state:
        tweets_by_state[name] = [tweet]
      elif name in tweets_by_state:
        tweets_by_state[name].append(tweet)
    return tweets_by_state
示例#8
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> two_tweets_by_state = group_tweets_by_state([sf, ny])
    >>> len(two_tweets_by_state)
    2
    >>> california_tweets = two_tweets_by_state['CA']
    >>> len(california_tweets)
    1
    >>> tweet_string(california_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    tweets_by_state = {}
    states_centers = {state: find_state_center(us_states[state]) for state in us_states.keys()} # creates the dictionary state_centers that contains each state it's its respective state center   
    for tweet in tweets: # goes through every tweet and assigns it to the dictionary tweets_by_state by its closest state center
      closest, state_name = 0, '' 
      for state in states_centers:
        distance = geo_distance(tweet_location(tweet), states_centers[state]) # adds the distance from tweet to each state center to the list called distances 
        if closest == 0 or distance < closest:
          closest = distance  # if current state is closer than previous closest state, reassigns closest and state_name accordingly
          state_name = state
      if state_name in tweets_by_state:   # if this state is already defined in the dictionary then the tweets is added to the existing state key
        tweets_by_state[state_name].append(tweet) 
      else: # if this state does not already contain a position in the directoy, then one is created
        tweets_by_state[state_name] = [tweet]
    return tweets_by_state   # Returns the dictionary that has aggregated tweets by their nearest state center
示例#9
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> two_tweets_by_state = group_tweets_by_state([sf, ny])
    >>> len(two_tweets_by_state)
    2
    >>> california_tweets = two_tweets_by_state['CA']
    >>> len(california_tweets)
    1
    >>> tweet_string(california_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    tweets_by_state = {}
    for tweet in tweets:
        minimum_distance = 5000
        this_key = 0
        for key in us_states.keys():
            if geo_distance(tweet_location(tweet), \
                find_state_center(us_states[key])) < minimum_distance:
                minimum_distance = geo_distance(tweet_location(tweet), \
                                    find_state_center(us_states[key]))
                this_key = key
        if this_key in tweets_by_state:
            tweets_by_state[this_key].append(tweet)
        else:
            tweets_by_state[this_key] = [tweet]
    return tweets_by_state
示例#10
0
文件: trends.py 项目: keyu-lai/cs61a
def group_tweets_by_state(tweets):
    """Return a dictionary that groups tweets by their nearest state center.

    The keys of the returned dictionary are state names and the values are
    lists of tweets that appear closer to that state center than any other.

    Arguments:
    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> two_tweets_by_state = group_tweets_by_state([sf, ny])
    >>> len(two_tweets_by_state)
    2
    >>> california_tweets = two_tweets_by_state['CA']
    >>> len(california_tweets)
    1
    >>> tweet_string(california_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    "*** YOUR CODE HERE ***"
    us_states_centers = {}
    for key in us_states.keys():
        us_states_centers[key] = find_state_center(us_states[key])
    result = []
    for tweet in tweets:
        dis, state = float('inf'), ''
        for key in us_states_centers.keys():
            tmp = geo_distance(tweet_location(tweet), us_states_centers[key])
            if tmp < dis:
                dis, state = tmp, key
        result.append([state, tweet])
    return group_by_key(result)
示例#11
0
def group_tweets_by_state(tweets):  #Problema 9
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("Welcome to San Francisco", None, 38, -122)
    >>> ny = make_tweet("Welcome to New York", None, 41, -74)
    >>> ca_tweets = group_tweets_by_state([sf, ny])['CA']
    >>> tweet_string(ca_tweets[0])
    '"Welcome to San Francisco" @ (38, -122)'
    """
    tweets_by_state = {}
    state_centers = {}

    for x in us_states.keys():
        center = find_center(us_states[x])
        state_centers[x] = center

    i = 0

    while i < len(tweets):
        nomeestado = find_closest_state(tweets[i], state_centers)
        if nomeestado in tweets_by_state.keys():
            tweets_by_state[nomeestado].append(tweets[i])
        else:
            tweets_by_state[nomeestado] = [tweets[i]]
        i = i + 1

    return tweets_by_state
示例#12
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> two_tweets_by_state = group_tweets_by_state([sf, ny])
    >>> len(two_tweets_by_state)
    2
    >>> california_tweets = two_tweets_by_state['CA']
    >>> len(california_tweets)
    1
    >>> tweet_string(california_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    tweets_by_state = {}
    for tweet in tweets:
        minimum_distance = 5000
        this_key = 0
        for key in us_states.keys():
            if geo_distance(tweet_location(tweet), \
                find_state_center(us_states[key])) < minimum_distance:
                minimum_distance = geo_distance(tweet_location(tweet), \
                                    find_state_center(us_states[key]))
                this_key = key
        if this_key in tweets_by_state:
            tweets_by_state[this_key].append(tweet)
        else:
            tweets_by_state[this_key] = [tweet]
    return tweets_by_state
示例#13
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> two_tweets_by_state = group_tweets_by_state([sf, ny])
    >>> len(two_tweets_by_state)
    2
    >>> california_tweets = two_tweets_by_state['CA']
    >>> len(california_tweets)
    1
    >>> tweet_string(california_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    tweets_by_state = {}
    for tweet in tweets:
        nearest_distance = 100000000000
        nearest_state = ""
        for state in us_states.keys():
            distance = geo_distance(find_state_center(us_states[state]),
                                    tweet_location(tweet))
            if distance < nearest_distance:
                nearest_distance = distance
                nearest_state = state
        if nearest_state in tweets_by_state.keys():
            tweets_by_state[nearest_state].append(tweet)
        else:
            tweets_by_state[nearest_state] = [tweet]
    return tweets_by_state
示例#14
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> ca_tweets = group_tweets_by_state([sf, ny])['CA']
    >>> tweet_string(ca_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    tweets_by_state = {}
    us_centers = {n: find_state_center(s) for n, s in us_states.items()}
    "*** YOUR CODE HERE ***"
    tweets_by_state = dict.fromkeys(us_states.keys())
    if "AA" in tweets_by_state:
        del tweets_by_state["AA"]
    for state in tweets_by_state:
        tweetlist = []
        for tweet in tweets:
            closest_state = find_closest_state(tweet, us_centers)
            if closest_state == state:
                tweetlist.append(tweet)
        tweets_by_state[state] = tweetlist
    return tweets_by_state
示例#15
0
 def closest_state(location):
     closest_state = 'AK'
     for state in us_states.keys():
         if geo_distance(location, centroids[state]) \
                         < geo_distance(location, centroids[closest_state]):
             closest_state = state
     return closest_state
示例#16
0
def closest_state(lat,lon):
    """ return the closest state to the position
    """
    state,coord=list(us_states.keys()),list_of_state_center(us_states)
    dictionary={}
    for i in range(0,len(state)):
        dictionary[state[i]]=geo_distance(coord[i],make_position(lat,lon))
    return min(dictionary,key=dictionary.get)
示例#17
0
def draw_centered_map(center_state='TX', n=10):
	"""Draw the n states closest to center_state."""
	center = us_centers[center_state.upper()]
	dist_from_center = lambda name: geo_distance(center, us_centers[name])
	for name in sorted(us_states.keys(), key=dist_from_center)[:int(n)]:
		draw_state(us_states[name])
		draw_name(name, us_centers[name])
	draw_dot(center, 1, 10)  # Mark the center state with a red dot
	wait()
示例#18
0
def draw_centered_map(center_state='TX', n=10, canvas=None):
    """Draw the n states closest to center_state."""
    us_centers = {n: find_center(s) for n, s in us_states.items()}
    center = us_centers[center_state.upper()]
    dist_from_center = lambda name: geo_distance(center, us_centers[name])
    for name in sorted(us_states.keys(), key=dist_from_center)[:int(n)]:
        draw_state(us_states[name], canvas=canvas)
        draw_name(name, us_centers[name], canvas=canvas)
    draw_dot(center, 1, 10, canvas=canvas)  # Mark the center state with a red dot
    wait(canvas=canvas)
示例#19
0
def print_num_tweets_per_state(tweets):
    #num = 0
    state_names = us_states.keys()
    tweets_by_state = group_tweets_by_state(tweets)
    for s in state_names:
        if s in tweets_by_state.keys():
            #num += len(tweets_by_state[s])
            print(s + ': ' + str(len(tweets_by_state[s])))
        else:
            print(s + ': 0')
示例#20
0
def draw_centered_map(center_state='TX', n=10):
    """Draw the n states closest to center_state."""
    us_centers = {n: find_state_center(s) for n, s in us_states.items()}
    center = us_centers[center_state.upper()]
    dist_from_center = lambda name: geo_distance(center, us_centers[name])
    for name in sorted(us_states.keys(), key=dist_from_center)[:int(n)]:
        draw_state(us_states[name])
        draw_name(name, us_centers[name])
    draw_dot(center, 1, 10)  # Mark the center state with a red dot
    wait()
示例#21
0
文件: trends.py 项目: doudoujay/cs61a
    def _get_closest_state( position ):
        smallest_distance = None
        closest_state = None
        
        for name in us_states.keys():
            state_center = find_state_center( us_states[name] )
        
            curr_distance = geo_distance( position, state_center )

            if smallest_distance is None or curr_distance < smallest_distance:
                closest_state = name
                smallest_distance = curr_distance

        return closest_state
示例#22
0
    def _get_closest_state(position):
        smallest_distance = None
        closest_state = None

        for name in us_states.keys():
            state_center = find_state_center(us_states[name])

            curr_distance = geo_distance(position, state_center)

            if smallest_distance is None or curr_distance < smallest_distance:
                closest_state = name
                smallest_distance = curr_distance

        return closest_state
示例#23
0
文件: trends.py 项目: icring/CS-61A
def tweet_closest_state(tweet):
    """Returns a state element. Finds it by creating list of states with distance
    from tweet. Sorts it by the distance element and then picks out first
    element (first element is smallest distance).

    Arguments:
    tweet -- a tweet abstract data type
    """
    state_distances, index = [], 0
    while index < len(us_states):
        curr_state, curr_state_centroid, index = list(us_states.keys())[index], list(us_states.values())[index], index + 1
        state_distances.append([curr_state, geo_distance(tweet_location(tweet), find_state_center(curr_state_centroid))])
    state_distances = sorted(state_distances, key = lambda distances: distances[1])
    return state_distances[0] # First index of state_distances gives the smallest distance. Sorted by distance. 
示例#24
0
文件: trends.py 项目: PMX10/projects
def draw_centered_map(center_state='TX', n=10):
    """Draw the n states closest to center_state.
    
    For example, to draw the 20 states closest to California (including California):

    # python3 trends.py CA 20
    """
    us_centers = {n: find_center(s) for n, s in us_states.items()}
    center = us_centers[center_state.upper()]
    dist_from_center = lambda name: geo_distance(center, us_centers[name])
    for name in sorted(us_states.keys(), key=dist_from_center)[:int(n)]:
        draw_state(us_states[name])
        draw_name(name, us_centers[name])
    draw_dot(center, 1, 10)  # Mark the center state with a red dot
    wait()
示例#25
0
def draw_centered_map(center_state='TX', n=10):
    """Draw the n states closest to center_state.
    
    For example, to draw the 20 states closest to California (including California):

    # python3 trends.py CA 20
    """
    us_centers = {n: find_center(s) for n, s in us_states.items()}
    center = us_centers[center_state.upper()]
    dist_from_center = lambda name: geo_distance(center, us_centers[name])
    for name in sorted(us_states.keys(), key=dist_from_center)[:int(n)]:
        draw_state(us_states[name])
        draw_name(name, us_centers[name])
    draw_dot(center, 1, 10)  # Mark the center state with a red dot
    wait()
示例#26
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    """
    
    tweets_by_state = {key:[] for key in us_states.keys()} 
    for t in tweets:
        state = find_closest_state(t,{n: find_center(s) for n, s in us_states.items()})
        tweets_by_state[state].append(t)
    return tweets_by_state
示例#27
0
def group_tweets_by_state(tweets):
    """Return a dictionary that groups tweets by their nearest state center.

    The keys of the returned dictionary are state names and the values are
    lists of tweets that appear closer to that state center than any other.

    Arguments:
    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> two_tweets_by_state = group_tweets_by_state([sf, ny])
    >>> len(two_tweets_by_state)
    2
    >>> california_tweets = two_tweets_by_state['CA']
    >>> len(california_tweets)
    1
    >>> tweet_string(california_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """

    state_list = list(us_states.keys())
    state_centroids = {x: find_state_center(us_states[x]) for x in state_list}

    def find_closest(tweet):
        '''Return the state to which the location of tweet is closest
        '''
        dist_dict = {
            x: geo_distance(
                tweet_location(tweet),
                make_position(state_centroids[x][0], state_centroids[x][1]))
            for x in state_list
        }
        dist_list = list(dist_dict.values())
        for x in state_list:
            if dist_dict[x] == min(dist_list):
                return x

    def list_closest(tweets, state):
        '''Return the list of tweets which are closest to state.
        '''
        return [x for x in tweets if find_closest(x) == state]

    return {
        x: list_closest(tweets, x)
        for x in state_list if list_closest(tweets, x) != []
    }
示例#28
0
文件: trends.py 项目: jmca93/CS61A
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> two_tweets_by_state = group_tweets_by_state([sf, ny])
    >>> len(two_tweets_by_state)
    2
    >>> california_tweets = two_tweets_by_state['CA']
    >>> len(california_tweets)
    1
    >>> tweet_string(california_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    tweets_by_state = {}
    i, num = 0, 0
    state_names = list(us_states.keys())
    state_centers = []
    distance = []
    for item in state_names:
        state_centers += [find_state_center(us_states[item])]
    while num < len(tweets):
        while i < len(state_names):
            distance += [
                geo_distance(tweet_location(tweets[num]), state_centers[i])
            ]
            i += 1
        closest_state = state_names[distance.index(min(distance))]
        if closest_state in tweets_by_state:
            tweets_by_state[closest_state] = tweets_by_state[closest_state] + [
                tweets[num]
            ]
        else:
            tweets_by_state[closest_state] = [tweets[num]]
        num += 1
        i = 0
        distance = []
    return tweets_by_state
示例#29
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> two_tweets_by_state = group_tweets_by_state([sf, ny])
    >>> len(two_tweets_by_state)
    2
    >>> california_tweets = two_tweets_by_state['CA']
    >>> len(california_tweets)
    1
    >>> tweet_string(california_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    "*** YOUR CODE HERE ***"

    centroids = {}
    for state in us_states.keys():
        centroids[state] = find_state_center(us_states[state])

    def closest_state(location):
        closest_state = 'AK'
        for state in us_states.keys():
            if geo_distance(location, centroids[state]) \
                            < geo_distance(location, centroids[closest_state]):
                closest_state = state
        return closest_state

    tweets_by_state = {}

    for tweet in tweets:
        state = closest_state(tweet_location(tweet))
        if state not in tweets_by_state.keys():
            tweets_by_state[state] = [tweet]
        else:
            tweets_by_state[state].append(tweet)

    return tweets_by_state
示例#30
0
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> two_tweets_by_state = group_tweets_by_state([sf, ny])
    >>> len(two_tweets_by_state)
    2
    >>> california_tweets = two_tweets_by_state['CA']
    >>> len(california_tweets)
    1
    >>> tweet_string(california_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    tweets_by_state = {}
    "*** YOUR CODE HERE ***"
    states_centers = {
        state: find_state_center(us_states[state])
        for state in us_states.keys()
    }  #generates dictionary with states and their center positions
    for tweet in tweets:
        closest = 999999999999  #initialize to very large distance value
        name = ''  #initialize closest state name
        for state in states_centers:
            distance = geo_distance(
                tweet_location(tweet), states_centers[state]
            )  #calculates distance to  all state centers
            if distance < closest:
                closest = distance  #saves closest distance and state name if new state is closer than previous best
                name = state
        #add tweet to appropriate entry or create new entry if nonexistent:
        if name not in tweets_by_state:
            tweets_by_state[name] = [tweet]
        elif name in tweets_by_state:
            tweets_by_state[name].append(tweet)
    return tweets_by_state
示例#31
0
def count_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their state of origin.

    The keys of the returned dictionary are state names, and the values are
    normalized per capita tweet frequencies. You may use the dictionary
    us_state_pop, which associates state abbreviation keys with 2013 estimated
    population for the given state.

    tweets -- a sequence of tweet abstract data types
    """

    newdict = {}
    state = list(us_states.keys())
    p = 0
    while p < len(state):
        newdict[state[p]] = 0
        p = p + 1
    elm = 0
    while elm < len(tweets):
        x = tweet_location(tweets[elm])
        for i, j in us_states.items():
            if is_in_state(x, us_states[i]) == True:
                newdict[i] += 1
        elm += 1

    for k, v in newdict.items():
        newdict[k] = newdict[k] / us_state_pop[k]

    x = newdict['IL']
    for k, v in newdict.items():
        y = max(newdict[k], x)
        if y > x:
            x = y

    for k, v in newdict.items():
        if x == 0:
            return newdict
        else:
            newdict[k] = newdict[k] / x

    return newdict
示例#32
0
文件: trends.py 项目: jmca93/CS61A
def group_tweets_by_state(tweets):
    """Return a dictionary that aggregates tweets by their nearest state center.

    The keys of the returned dictionary are state names, and the values are
    lists of tweets that appear closer to that state center than any other.

    tweets -- a sequence of tweet abstract data types

    >>> sf = make_tweet("welcome to san francisco", None, 38, -122)
    >>> ny = make_tweet("welcome to new york", None, 41, -74)
    >>> two_tweets_by_state = group_tweets_by_state([sf, ny])
    >>> len(two_tweets_by_state)
    2
    >>> california_tweets = two_tweets_by_state['CA']
    >>> len(california_tweets)
    1
    >>> tweet_string(california_tweets[0])
    '"welcome to san francisco" @ (38, -122)'
    """
    tweets_by_state = {}
    i, num = 0, 0
    state_names = list(us_states.keys())
    state_centers = []
    distance = []
    for item in state_names:
        state_centers += [find_state_center(us_states[item])]
    while num < len(tweets):
        while i < len(state_names):
            distance += [geo_distance(tweet_location(tweets[num]), state_centers[i])]
            i += 1
        closest_state = state_names[distance.index(min(distance))]
        if closest_state in tweets_by_state:
            tweets_by_state[closest_state] = tweets_by_state[closest_state] + [tweets[num]]
        else:
            tweets_by_state[closest_state] = [tweets[num]]
        num += 1
        i = 0
        distance = []
    return tweets_by_state