def main(): 
    
    data.city_corpus_dict()
    #seeds words table with city, word, prob(w/city)
    data.seed_words_table()
    data.city_tweet_corpus_dict()
    for city in cities:
	data.create_region_tweet_count(city)
	feature_selection.populate_db_with_features(city)
    data.create_tweet_total_count()
def main():

    data.city_corpus_dict()
    #seeds words table with city, word, prob(w/city)
    data.seed_words_table()
    data.city_tweet_corpus_dict()
    for city in cities:
        data.create_region_tweet_count(city)
        feature_selection.populate_db_with_features(city)
    data.create_tweet_total_count()
def list_features(city_name):
    cities = data.cities
    city = None
    for i in range(0, len(cities)):
	if cities[i].name == city_name:
	    feature_list = feature_selection.get_hardcoded_features(cities[i])
	    city = cities[i]
    if city:
	latitude = city.lat
	longitude = city.lon
	city_name = city.name
	city_tweet_count = data.create_region_tweet_count(city)
	city_word_count = data.find_leng_city_corpus(city)
    return render_template("city_words.html", features= feature_list, city_name=city_name, latitude=latitude, longitude=longitude, city_tweet_count=city_tweet_count, city_word_count=city_word_count)
def classify_text():
    tweet = request.form['tweet']

    start = datetime.datetime.now()
    rankings = data.create_ranking(tweet)
    end = datetime.datetime.now()
    print 'getting city rankings takes: %s' % (end - start)

    start = datetime.datetime.now()
    feature_strings_dict = {}
    city_corpus_leng_dict = {}
    city_tweet_count_dict = {}
    for city in cities:
        corpus_leng = data.find_leng_city_corpus(city)
        city_corpus_leng_dict[city.name] = corpus_leng

        city_tweet_count = data.create_region_tweet_count(city)
        city_tweet_count_dict[city.name] = city_tweet_count

        feature_strings = feature_selection.included_feature_strings(
            city, tweet)
        feature_strings_dict[city.name] = feature_strings
    end = datetime.datetime.now()
    print 'getting top 5 words takes: %s' % (end - start)

    start = datetime.datetime.now()
    cty_corpus_dict = data.city_corpus_dict()
    word_count_dict = cty_corpus_dict[rankings[0][0].name]
    end = datetime.datetime.now()
    print 'getting bogus word count dict takes: %s' % (end - start)

    start = datetime.datetime.now()
    names = []
    for i in range(0, len(rankings)):
        city_name = rankings[i][0].name
        names.append(city_name)
    end = datetime.datetime.now()
    print 'generating lists takes: %s' % (end - start)
    return render_template("map.html",
                           tweet=tweet,
                           city_tweet_count_dict=city_tweet_count_dict,
                           names=names,
                           city_corpus_leng_dict=city_corpus_leng_dict,
                           feature_strings_dict=feature_strings_dict,
                           rankings=rankings)
def get_tweet_word_counts(word, city):
    city_tweet_corpus_dict = data.city_tweet_corpus_dict()
    word_tweet_count_dic = city_tweet_corpus_dict[city.name]
    N11 = word_tweet_count_dic.get(word,0)
    
    Ndot_1 = data.create_region_tweet_count(city)
    N01 = Ndot_1 - N11
    N = data.create_tweet_total_count()
    Ndot_0 = N - Ndot_1
    N1_dot = 0 
    for c in cities:
	tweet_corpus_d = city_tweet_corpus_dict[c.name]
	N1_dot += tweet_corpus_d.get(word, 0)
    N10 = N1_dot - N11 
    N00 = Ndot_0 - N10
    N0_dot = N00 + N01 

    return {
	    'N10': float(N10), 'N11': float(N11), 'N01': float(N01), 'N00': float(N00)}
def list_features(city_name):
    cities = data.cities
    city = None
    for i in range(0, len(cities)):
        if cities[i].name == city_name:
            feature_list = feature_selection.get_hardcoded_features(cities[i])
            city = cities[i]
    if city:
        latitude = city.lat
        longitude = city.lon
        city_name = city.name
        city_tweet_count = data.create_region_tweet_count(city)
        city_word_count = data.find_leng_city_corpus(city)
    return render_template("city_words.html",
                           features=feature_list,
                           city_name=city_name,
                           latitude=latitude,
                           longitude=longitude,
                           city_tweet_count=city_tweet_count,
                           city_word_count=city_word_count)
def classify_text():
    tweet = request.form['tweet']

    start = datetime.datetime.now()
    rankings = data.create_ranking(tweet)
    end = datetime.datetime.now()
    print 'getting city rankings takes: %s' % (end - start)
    

    start = datetime.datetime.now()
    feature_strings_dict = {}
    city_corpus_leng_dict = {}
    city_tweet_count_dict = {}
    for city in cities:
	corpus_leng = data.find_leng_city_corpus(city)
	city_corpus_leng_dict[city.name] = corpus_leng
	
	city_tweet_count = data.create_region_tweet_count(city)
	city_tweet_count_dict[city.name] = city_tweet_count

	feature_strings = feature_selection.included_feature_strings(city, tweet)
	feature_strings_dict[city.name] = feature_strings
    end = datetime.datetime.now()
    print 'getting top 5 words takes: %s' % (end - start)
   
    start = datetime.datetime.now()
    cty_corpus_dict = data.city_corpus_dict()
    word_count_dict = cty_corpus_dict[rankings[0][0].name]
    end = datetime.datetime.now()
    print 'getting bogus word count dict takes: %s' % (end - start)

    start = datetime.datetime.now()
    names = []
    for i in range(0, len(rankings)):
	city_name = rankings[i][0].name
	names.append(city_name)
    end = datetime.datetime.now()
    print 'generating lists takes: %s' % (end - start)
    return render_template("map.html", tweet=tweet, city_tweet_count_dict=city_tweet_count_dict, names=names, city_corpus_leng_dict=city_corpus_leng_dict, feature_strings_dict=feature_strings_dict, rankings=rankings)
def get_tweet_word_counts(word, city):
    city_tweet_corpus_dict = data.city_tweet_corpus_dict()
    word_tweet_count_dic = city_tweet_corpus_dict[city.name]
    N11 = word_tweet_count_dic.get(word, 0)

    Ndot_1 = data.create_region_tweet_count(city)
    N01 = Ndot_1 - N11
    N = data.create_tweet_total_count()
    Ndot_0 = N - Ndot_1
    N1_dot = 0
    for c in cities:
        tweet_corpus_d = city_tweet_corpus_dict[c.name]
        N1_dot += tweet_corpus_d.get(word, 0)
    N10 = N1_dot - N11
    N00 = Ndot_0 - N10
    N0_dot = N00 + N01

    return {
        'N10': float(N10),
        'N11': float(N11),
        'N01': float(N01),
        'N00': float(N00)
    }