Exemplo n.º 1
0
def export_temporal_data(time_window, start_time, end_time):
    fetched_results = get_all_stats()
    ids = get_all_foursquare_ids()
    venue_dic = {}
    for r in ids.keys():
        venue_dic[r] = []

    cur_time = start_time
    """round time to nearest hour/day/week"""
    if time_window == 'hour':
        time_window = timedelta(hours=2)
        cur_time = cur_time - timedelta(
            minutes=cur_time.minute,
            seconds=cur_time.second,
            microseconds=cur_time.microsecond) + timedelta(hours=1)
    elif time_window == 'day':
        time_window = timedelta(days=1)
        cur_time = cur_time - timedelta(
            hours=cur_time.hour,
            minutes=cur_time.minute,
            seconds=cur_time.second,
            microseconds=cur_time.microsecond) + timedelta(days=1)
    elif time_window == 'week':
        time_window = timedelta(weeks=1)
        cur_time = cur_time - timedelta(
            hours=cur_time.hour,
            minutes=cur_time.minute,
            seconds=cur_time.second,
            microseconds=cur_time.microsecond) + timedelta(weeks=1)

    data_type = 'checkinsCount'
    time_list = []
    time_list.append('Plaza/Time')
    while cur_time > start_time and cur_time <= end_time:
        print cur_time
        time_list.append(cur_time.hour)
        for id in venue_dic.keys():
            sql = "select * from venue_stats where id =  '" + id + "' and time between '" + str(
                cur_time) + "' and '" + str(cur_time + time_window) + "'"
            cursor = connect_to_mysql()
            cursor.execute(sql)
            res = cursor.fetchall()
            if len(res) != 0:
                tmp = [val[data_type] for val in res]
                value = max(tmp) - min(tmp)
            else:
                value = 0
            venue_dic[id].append(value)
        cur_time += time_window
    f = open('./temporal_2hour.csv', 'wt')
    writer = csv.writer(f, quoting=csv.QUOTE_ALL)
    writer.writerow(time_list)

    for key in venue_dic.keys():
        venue_dic[key].insert(0, ids[key])
        writer.writerow(venue_dic[key])
    print data_type
    print time_list
    for id in ids.keys():
        print ids[id], venue_dic[id]
Exemplo n.º 2
0
def main():
    foursquare_ids = get_all_foursquare_ids()
    print foursquare_ids
    venues = {}
    cursor = connect_to_mysql()
    word_doc_freq = {}
    docs = []
    names = []
    comments_count = []
    for id in foursquare_ids:
        sql = """select caption from plazas_instaphoto where caption is not NULL and foursquare_venue_id='""" + id + "'"
        venue_name = foursquare_ids[id]
        venues[venue_name] = {}

        cursor.execute(sql)
        res = cursor.fetchall()
        doc = ""
        for r in res:
            comments = r['caption']
            #comments = json.loads(r['caption'])
            for sentence in comments:
                #doc += sentence[1]
                doc += sentence
        docs.append(doc)
        names.append(venue_name)
        comments_count.append(len(res))
    t = TFIDF()
    res = t.compute_tfidf(docs)
    for i in range(len(res)):
        print 'Plaza name : ' + names[i].encode('utf-8', 'ignore')
        print 'Comments for this Plaza : ' + str(comments_count[i])
        print res[i]
Exemplo n.º 3
0
def main():
    foursquare_ids = get_all_foursquare_ids()
    print foursquare_ids
    venues = {}
    cursor = connect_to_mysql()
    word_doc_freq = {}
    docs = [] 
    names = []
    comments_count = []
    for id in foursquare_ids:
        sql = """select caption from plazas_instaphoto where caption is not NULL and foursquare_venue_id='"""+id + "'"
        venue_name = foursquare_ids[id]
        venues[venue_name] = {}
        
        cursor.execute(sql)
        res = cursor.fetchall()
        doc = ""
        for r in res:
            comments = r['caption']
            #comments = json.loads(r['caption'])
            for sentence in comments:
                #doc += sentence[1]
                doc += sentence
        docs.append(doc)
        names.append( venue_name )
        comments_count.append( len(res) )
    t = TFIDF()
    res =  t.compute_tfidf(docs)
    for i in range(len(res) ):
        print 'Plaza name : '+names[i].encode('utf-8','ignore')
        print 'Comments for this Plaza : ' + str(comments_count[i])
        print res[i]
Exemplo n.º 4
0
def checkin_update():
    """Update all the checkin info for each foursquare venue in database"""
    add_table_venue_stats()
    venue_ids = get_all_foursquare_ids()
    client = foursquare.Foursquare(config.foursquare_client_id, client_secret=config.foursquare_client_secret)
    for venue_id in venue_ids:
        #try:
        print venue_id
        venue_meta = client.venues(venue_id)
        save_venue_stats(venue_meta, venue_id)
Exemplo n.º 5
0
def instagram_test():
    add_table_venue_photo_instagram()
    crawler = VenuePhotoCrawlerInstagram()
    foursquare_ids = get_all_foursquare_ids()
    #fetched_ids = get_all_photo_fetched_venue_id_instagram()
    for foursquare_id in foursquare_ids.keys():
        """NOTICE THIS IS TO AVOID REPEATING FETCHING"""
        #if foursquare_id not in fetched_ids:
        print foursquare_id, foursquare_ids[foursquare_id]
        crawler.grab_photos(foursquare_id)
Exemplo n.º 6
0
def instagram_test():
    add_table_venue_photo_instagram()
    crawler = VenuePhotoCrawlerInstagram()
    foursquare_ids = get_all_foursquare_ids()
    #fetched_ids = get_all_photo_fetched_venue_id_instagram()
    for foursquare_id in foursquare_ids.keys():
        """NOTICE THIS IS TO AVOID REPEATING FETCHING"""
        #if foursquare_id not in fetched_ids:
        print foursquare_id, foursquare_ids[foursquare_id]
        crawler.grab_photos(foursquare_id)
Exemplo n.º 7
0
def main():
    foursquare_ids = get_all_foursquare_ids()
    print foursquare_ids
    venues = {}
    cursor = connect_to_mysql()
    word_doc_freq = {}
    global_word_freq = {}
    for id in foursquare_ids:
        sql = (
            """select comments from venue_photo_instagram where comments is not NULL and foursquare_venue_id='"""
            + id
            + "'"
        )
        venue_name = foursquare_ids[id]
        venues[venue_name] = {}

        cursor.execute(sql)
        res = cursor.fetchall()
        # comments = []
        for r in res:
            comments = json.loads(r["comments"])
            for sentence in comments:
                words = tokenize(sentence[1])
                for word in words:
                    if word in global_word_freq:
                        global_word_freq[word] += 1
                    else:
                        global_word_freq[word] = 1

                    if word in venues[venue_name]:
                        venues[venue_name][word] += 1
                    else:
                        venues[venue_name][word] = 1
        for w in venues[venue_name].keys():
            if w in word_doc_freq:
                word_doc_freq[w] += 1
            else:
                word_doc_freq[w] = 1
        # comments.append( json.loads(r['comments']) )
        # print venues[venue_name]
    too_common_word = []
    for word in sorted(global_word_freq.items(), key=lambda tup: tup[1], reverse=True)[:300]:
        too_common_word.append(word[0])
    print too_common_word
    for venue_name in venues.keys():
        word_score = []
        words = venues[venue_name]
        for word in words:
            score = words[word] / (1 + math.log(word_doc_freq[word]))
            if word not in too_common_word:
                word_score.append((word, score, words[word]))

        print venue_name
        print sorted(word_score, key=lambda tup: tup[1], reverse=True)[0:20]
Exemplo n.º 8
0
def export_temporal_data(time_window, start_time, end_time):
    fetched_results = get_all_stats()
    ids = get_all_foursquare_ids()
    venue_dic = {}
    for r in ids.keys():
        venue_dic[r] = [] 
    
    cur_time = start_time
    """round time to nearest hour/day/week"""
    if time_window == 'hour':
        time_window = timedelta(hours=2)
        cur_time = cur_time - timedelta(minutes=cur_time.minute, seconds=cur_time.second, microseconds = cur_time.microsecond) + timedelta(hours=1)
    elif time_window == 'day':
        time_window = timedelta(days=1)
        cur_time = cur_time - timedelta(hours = cur_time.hour, minutes=cur_time.minute, seconds=cur_time.second, microseconds = cur_time.microsecond) + timedelta(days=1)
    elif time_window == 'week':
        time_window = timedelta(weeks=1)
        cur_time = cur_time - timedelta(hours = cur_time.hour, minutes=cur_time.minute, seconds=cur_time.second, microseconds = cur_time.microsecond)+timedelta(weeks=1)
     
    data_type = 'checkinsCount'
    time_list = []
    time_list.append('Plaza/Time')
    while cur_time > start_time and cur_time <= end_time:
        print cur_time
        time_list.append(cur_time.hour)
        for id in venue_dic.keys():
            sql = "select * from venue_stats where id =  '" + id + "' and time between '" + str(cur_time) + "' and '" + str(cur_time+time_window) +"'"
            cursor = connect_to_mysql()
            cursor.execute(sql)
            res = cursor.fetchall()
            if len(res) != 0:
                tmp = [val[data_type] for val in res ]
                value = max(tmp) - min(tmp) 
            else:
                value = 0
            venue_dic[id].append( value )
        cur_time+= time_window
    f = open('./temporal_2hour.csv', 'wt')
    writer = csv.writer(f, quoting=csv.QUOTE_ALL)
    writer.writerow( time_list)
    
    for key in venue_dic.keys():
        venue_dic[key].insert(0, ids[key])
        writer.writerow( venue_dic[key] )
    print data_type
    print time_list
    for id in ids.keys():
        print ids[id], venue_dic[id]
Exemplo n.º 9
0
def main():
    foursquare_ids = get_all_foursquare_ids()
    print foursquare_ids
    venues = {}
    cursor = connect_to_mysql()
    word_doc_freq = {}
    global_word_freq = {}
    for id in foursquare_ids:
        sql = """select comments from plazas_instaphoto where comments is not NULL and foursquare_venue_id='""" + id + "'"
        venue_name = foursquare_ids[id]
        venues[venue_name] = {}

        cursor.execute(sql)
        res = cursor.fetchall()
        for r in res:
            comments = json.loads(r['comments'])
            for sentence in comments:
                words = tokenize(sentence[1])
                for word in words:
                    if word in global_word_freq:
                        global_word_freq[word] += 1
                    else:
                        global_word_freq[word] = 1

                    if word in venues[venue_name]:
                        venues[venue_name][word] += 1
                    else:
                        venues[venue_name][word] = 1
        for w in venues[venue_name].keys():
            if w in word_doc_freq:
                word_doc_freq[w] += 1
            else:
                word_doc_freq[w] = 1
    too_common_word = set()
    for word in sorted(global_word_freq.items(),
                       key=lambda tup: tup[1],
                       reverse=True)[:300]:
        too_common_word.add(word[0])
    for venue_name in venues.keys():
        word_score = []
        words = venues[venue_name]
        for word in words:
            score = words[word] / (1 + math.log(word_doc_freq[word]))
            if word not in too_common_word:
                word_score.append((word, score, words[word]))

        print venue_name
        print sorted(word_score, key=lambda tup: tup[1], reverse=True)[0:20]
def main():
    foursquare_ids = get_all_foursquare_ids()
    print foursquare_ids
    venues = {}
    cursor = connect_to_mysql()
    word_doc_freq = {}
    global_word_freq = {}
    for id in foursquare_ids:
        sql = """select caption from plazas_instaphoto where caption is not NULL and foursquare_venue_id='"""+id + "'"
        venue_name = foursquare_ids[id]
        venues[venue_name] = {}

        cursor.execute(sql)
        res = cursor.fetchall()
        for r in res:
            comments = [r['caption']]
            for sentence in comments:
                words = tokenize(sentence)
                for word in words:
                    if word in global_word_freq:
                        global_word_freq[word]+=1
                    else:
                        global_word_freq[word] = 1

                    if word in venues[venue_name]:
                        venues[venue_name][word] += 1
                    else:
                        venues[venue_name][word] = 1
        for w in venues[venue_name].keys():
            if w in word_doc_freq:
                word_doc_freq[w]+=1
            else:
                word_doc_freq[w] = 1
    too_common_word = set()
    for word in sorted(global_word_freq.items(), key = lambda tup: tup[1], reverse=True)[:300]:
        too_common_word.add(word[0])
    for venue_name in venues.keys():
        word_score = []
        words = venues[venue_name]
        for word in words:
            score = words[word]/(1+math.log(word_doc_freq[word]))
            if word not in too_common_word:
                word_score.append( (word, score, words[word]) )
        
        print venue_name.encode('utf-8','ignore')
        print sorted(word_score, key=lambda tup: tup[1], reverse=True)[0:50]