def get_db_connection(): global cursor, connection if cursor is None or connection is None: connection, cursor = utils.connect_to_db( "foursquare", cursor_type=psycopg2.extras.DictCursor) # Test the liveliness of the connection. try: cursor.execute("SELECT 1") except psycopg2.OperationalError: # Reconnect to the database. connection, cursor = utils.connect_to_db( "foursquare", cursor_type=psycopg2.extras.DictCursor)
def get_venues_reviewed(): connection, cursor = utils.connect_to_db( "study", cursor_type=psycopg2.extras.DictCursor) query_string = sql.SQL("SELECT DISTINCT place_id FROM places;") cursor.execute(query_string) return [record['place_id'] for record in cursor]
def get_and_save_interests_to_db(place_id, topni=10): interests = get_all_interests(place_id, topni=topni) print(" [.] Got %s interests" % len(interests)) if len(interests) == 0: return connection, cursor = utils.connect_to_db("foursquare") for idx, interest_list in enumerate(interests): if interest_list is None: print("No interests for index %s" % idx) continue for rank, interest in enumerate(interest_list): tags = process_tags(interest[3], interest[7]) print(" [.] Tags: %s" % tags) ppi = { 'place_id': place_id, 'pi_id': interest[0], 'score': float(interest[2]), 'feature_type': interest[3], 'phrase_modeler': interest[4], 'avg': interest[5], 'model_type': interest[6], 'tags': json.dumps(tags, cls=JSONEncoder), 'rank': int(rank) } foursquare.save_place_personal_information_to_db(ppi, cursor=cursor) connection.commit() print("Done saving interests")
def generate_csv(): """Exports a .csv file that corresponds to the columns in the DB. """ conn, c = utils.connect_to_db(utils.Config.DB_NAME) fn = input(Multiline.export_database) + '.csv' db_rows = [] try: db_rows = c.execute(utils.SQLQuery.query_all_rows).fetchall() except sqlite3.DatabaseError: print(utils.GenData.db_error_msg) else: conn.commit() finally: c.close() conn.close() if db_rows: with open(fn, 'w', newline='') as csv_f: fields = utils.GenData.columns writer = csv.DictWriter(csv_f, fieldnames=fields) writer.writeheader() for item in db_rows: writer.writerow({ 'ID': item[0], 'Title': item[1], 'Star': item[2], 'Costar': item[3], 'Year': item[4], 'Genre': item[5] }) print(f"{fn} successfully written.") input(utils.GenData.enter_continue)
def get_places(location, distance=200, limit=5): connection, cursor = utils.connect_to_db("dbpedia", cursor_type=psycopg2.extras.DictCursor) query_string = """ WITH place AS ( SELECT ST_TRANSFORM(ST_SETSRID(ST_MAKEPOINT(%s, %s),4326),3857) as coords ) SELECT name, uri, longitude, latitude, ST_Distance(p.coords, ST_TRANSFORM(geom, 3857)) AS distance FROM places, place p WHERE latitude <> -90 AND longitude <> 0 AND ST_DWithin(ST_TRANSFORM(geom, 3857), p.coords, %s) ORDER BY distance ASC LIMIT %s;""" data = (location['lon'], location['lat'], distance, limit) cursor.execute(query_string, data) records = cursor.fetchall() places = [] for rec in records: loc = {"name": rec['place_name'], "color": "#261b7c", "id": rec['place_uri'], "distance": rec["distance"], "location": { "lon": rec['longitude'], "lat": rec['latitude'] } } places.append(loc) return places
def get_foursquare_venue_polygon(venue_id): connection, cursor = utils.connect_to_db( "foursquare", cursor_type=psycopg2.extras.DictCursor) query_string = """ SELECT venue_id, name, longitude, latitude FROM venues WHERE venue_id = %s;""" cursor.execute(query_string, (venue_id, )) rec = cursor.fetchone() if not rec: return venue = dict(rec) location = {"lon": venue['longitude'], "lat": venue['latitude']} venue_name = venue['name'] # get the polygon from nominatim print("get polygon for place {} with location {}, {}".format( venue_name, venue['longitude'], venue['latitude'])) res = get_place_with_name(location, venue_name) if not res: return print("Found polygon") query_string = """ UPDATE venues SET poly = ST_GeomFromText(%s, 4326) WHERE venue_id = %s;""" data = (res['geotext'], venue_id) cursor.execute(query_string, data) connection.commit()
def delete_record(): """Deleting functionality for the program. """ conn, c = utils.connect_to_db(utils.Config.DB_NAME) title = input(Multiline.delete_record), movie = c.execute(utils.SQLQuery.query_by_title, title).fetchone() print(Multiline.record_to_delete) if movie: utils.print_record(movie) print(utils.GenData.line_sep) conf_deletion = input(Multiline.delete_confirmation) if conf_deletion == 'Y' or conf_deletion == 'y': try: c.execute(utils.SQLQuery.delete_record, title) except sqlite3.DatabaseError: print(utils.GenData.db_error_msg) input(utils.GenData.enter_continue) else: conn.commit() finally: c.close() conn.close() print("Item deleted.") input(utils.GenData.enter_continue) else: print("Item NOT deleted.") input(utils.GenData.enter_continue) else: print("Record not found.") input(utils.GenData.enter_continue)
def user_stats(connection=None, cursor=None): if connection is None or cursor is None: connection, cursor = utils.connect_to_db( "study", cursor_type=psycopg2.extras.DictCursor) query_string = sql.SQL(""" SELECT COUNT(p.place_id) as nb_places, s.ip_address, s.session_start, s.session_end FROM places p JOIN sessions s ON s.session_id = p.session_id GROUP BY s.session_id, s.ip_address, s.session_start, s.session_end;""") cursor.execute(query_string) res = [] for record in cursor: res.append({ 'nb_places': record['nb_places'], 'ip_address': record['ip_address'], 'start': 'N/A' if record['session_start'] is None else utils.datetime_from_timestamp( record['session_start']).strftime("%Y-%m-%d %H:%M:%S"), 'end': 'N/A' if record['session_end'] is None else utils.datetime_from_timestamp( record['session_end']).strftime("%Y-%m-%d %H:%M:%S") }) return res
def update_all_foursquare_venues_with_polygon(): connection, cursor = utils.connect_to_db( "foursquare", cursor_type=psycopg2.extras.DictCursor) query_string = """ SELECT venue_id, name, longitude, latitude FROM venues;""" cursor.execute(query_string) records = cursor.fetchall() venues = [dict(e) for e in records] for venue in venues: location = {"lon": venue['longitude'], "lat": venue['latitude']} res = get_place_with_name(location, venue['name']) if not res or 'geotext' not in res: continue query_string = """ UPDATE venues SET poly = ST_GeomFromText(%s, 4326) WHERE venue_id = %s;""" data = (res['geotext'], venue['venue_id']) cursor.execute(query_string, data) connection.commit()
def get_foursquare_redirected_venue(venue_id, connection=None, cursor=None): global nb_attempts if connection is None or cursor is None: connection, cursor = utils.connect_to_db("foursquare", cursor_type=psycopg2.extras.DictCursor) query = sql.SQL("""SELECT name, emoji, redirect_venue_id FROM venues WHERE venue_id = %s;""") cursor.execute(query, (venue_id,)) try: res = cursor.fetchone() redirected_venue_id = res['redirect_venue_id'] name = res['name'] emoji = res['emoji'] nb_attempts = 10 return redirected_venue_id, name, emoji except: print("Retry") time.sleep(1) if nb_attempts >= 0: nb_attempts -= 1 return get_foursquare_redirected_venue(venue_id, connection=connection, cursor=cursor) else: print("give up - venue %s" % venue_id) return None
def model_init(): global c_study, cur_study, pis, pis_ids, nb_pis, place_ids, nb_places, clfs, nb_clfs # 0-0. Init. stime = time.time() c_study, cur_study = utils.connect_to_db( "study", cursor_type=psycopg2.extras.DictCursor) print("[.] Init - %.2f" % (time.time() - stime)) # 0-1. Get the personal information. start_time = time.time() pis = foursquare.load_personal_information() pis_ids = [pi['pi_id'] for pi in pis.values()] nb_pis = len(pis_ids) print("[.] Got %s personal information ids (%.2f)" % (nb_pis, time.time() - start_time)) # 0-2. Get the classifiers start_time = time.time() params = get_all_classifiers() clfs = [] for param in params: clfs.append(Classifier(param, pis_ids)) nb_clfs = len(clfs) print("[.] Got %s classifiers (%.2f)" % (nb_clfs, time.time() - start_time)) # 0-3. Get the places start_time = time.time() place_ids = get_all_distinct_place_ids(connection=c_study, cursor=cur_study) nb_places = len(place_ids) print("[.] Got %s place ids (%.2f)" % (nb_places, time.time() - start_time))
def redirect_all_foursquare_venues(): connection, cursor = utils.connect_to_db("foursquare", cursor_type=psycopg2.extras.DictCursor) with open(FSQ_CHECKINS_FILTERED_POIS_REDIRECTED, 'w') as f_out: with open(FQS_CHECKINS_FILTERED_POIS, 'r') as f: count = 0 count_redirected = 0 for line in tqdm.tqdm(f, total=get_num_lines(FQS_CHECKINS_FILTERED_POIS)): fields = line.strip().split('\t') venue_id = fields[0] if venue_id in REMOVE_VENUES: continue redirected_venue_id, name, emoji = get_foursquare_redirected_venue(venue_id, connection=connection, cursor=cursor) fields.append(name) fields.append(emoji) if redirected_venue_id is not None: redirected_venues[venue_id] = redirected_venue_id fields[0] = redirected_venue_id count_redirected += 1 try: f_out.write("\t".join(fields)+"\n") except: print(venue_id) # sys.exit(0) count += 1 print("[.] Redirected %s venues out of %s from the Foursquare checkins dataset." % (count_redirected, count))
def get_relevance_ratings_admin(place_id, connection=None, cursor=None): if connection is None or cursor is None: connection, cursor = utils.connect_to_db( "study", cursor_type=psycopg2.extras.DictCursor) query = sql.SQL("""SELECT pi_id, array_agg(rating) as ratings FROM place_personal_information_relevance WHERE place_id = %s AND session_id = 'admin' GROUP BY pi_id;""") data = (place_id, ) cursor.execute(query, data) res = {} for record in cursor: pi_id = record['pi_id'] ratings = record['ratings'] avg_rating = sum(r for r in ratings) / len(ratings) res[pi_id] = { "pi_id": pi_id, "ratings": ratings, "name": pis[pi_id]['name'], "avg_rating": avg_rating, "category_id": pis[pi_id]['category_id'], "subcategory_name": pis[pi_id]['subcategory_name'], "category_icon": pis[pi_id]['category_icon'], "tags": pis[pi_id]['tags'] } return res
def get_personal_information_list_from_foursquare_category(category_id): connection, cursor = utils.connect_to_db( "foursquare", cursor_type=psycopg2.extras.DictCursor) query_string = """SELECT pi_ids FROM categories WHERE category_id = %s""" cursor.execute(query_string, (category_id, )) res = cursor.fetchone() return res[0] if res and res[0] else []
def save_place(session_id, place): connection, cursor = utils.connect_to_db("study") place_id = place['id'] icon = place['icon-name'] place_name = place['name'] lon = place['lon'] lat = place['lat'] address = place['address'] city = place['city'] fulladdress = place['fulladdress'] category = place['category'] query_string = """INSERT INTO places (place_id, session_id, place_name, longitude, latitude, address, city, category, fulladdress, icon) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON CONFLICT (session_id, place_id) DO UPDATE SET place_name=EXCLUDED.place_name, longitude=EXCLUDED.longitude, latitude=EXCLUDED.latitude, address=EXCLUDED.address, city=EXCLUDED.city, category=EXCLUDED.category, fulladdress=EXCLUDED.fulladdress, icon=EXCLUDED.icon;""" data = (place_id, session_id, place_name, lon, lat, address, city, category, fulladdress, icon) cursor.execute(query_string, data) connection.commit()
def save_nodes_to_db(geo_mapped_instances, instances): connection, cursor = utils.connect_to_db("dbpedia") date_added = 2016 count = 0 for node in geo_mapped_instances.keys(): if lon_predicate not in geo_mapped_instances[node] or lat_predicate not in geo_mapped_instances[node]: print('Could not insert {} into database'.format(node)) continue place_name = get_name_from_uri(node) place_uri = node lon = float(geo_mapped_instances[node][lon_predicate][0]) lat = float(geo_mapped_instances[node][lat_predicate][0]) location_point = "POINT({} {})".format(lon, lat) place_type = "" if node in instances: if type_predicate in instances[node]: place_type = instances[node][type_predicate][0] query_string = """INSERT INTO places (date_added, uri, name, type_uri, longitude, latitude, geom) VALUES (%s, %s, %s, %s, %s, %s, ST_GeomFromText(%s, 4326)) ON CONFLICT DO NOTHING;""" data = (date_added, place_uri, place_name, place_type, lon, lat, location_point) cursor.execute(query_string, data) count += 1 if count % 100 == 0: print_progress("Done %.2f" % (100.0 * count / len(geo_mapped_instances))) connection.commit() print()
def add_trackingadvisor_id(session_id, user_id): connection, cursor = utils.connect_to_db("study") query_string = """UPDATE sessions SET user_id = %s WHERE session_id = %s;""" data = (user_id, session_id) cursor.execute(query_string, data) connection.commit()
def end_session(session_id): connection, cursor = utils.connect_to_db("study") query_string = """UPDATE sessions SET session_end = %s WHERE session_id = %s;""" data = (utils.current_timestamp(), session_id) cursor.execute(query_string, data) connection.commit()
def get_all_places(connection=None, cursor=None): if connection is None or cursor is None: connection, cursor = utils.connect_to_db( "study", cursor_type=psycopg2.extras.DictCursor) query_string = sql.SQL("""SELECT * FROM places;""") cursor.execute(query_string) return [dict(record) for record in cursor]
def get_all_distinct_place_ids(connection=None, cursor=None): if connection is None or cursor is None: connection, cursor = utils.connect_to_db( "study", cursor_type=psycopg2.extras.DictCursor) query_string = sql.SQL("SELECT DISTINCT place_id FROM places;") cursor.execute(query_string) return [record['place_id'] for record in cursor]
def get_nb_distinct_places(connection=None, cursor=None): if connection is None or cursor is None: connection, cursor = utils.connect_to_db( "study", cursor_type=psycopg2.extras.DictCursor) query_string = sql.SQL("SELECT COUNT(DISTINCT place_id) FROM places;") cursor.execute(query_string) return cursor.fetchone()[0]
def get_personal_information_from_foursquare_category(category_id): connection, cursor = utils.connect_to_db( "foursquare", cursor_type=psycopg2.extras.DictCursor) query_string = """ SELECT c.category_id, c.name, pi.pi_id, pi.name as pi_name, pi.category_id as picid, pi.category_icon, pi.subcategory_icon, pi.subcategory_name FROM categories c, unnest(pi_ids) piid JOIN personal_information pi ON piid::text = pi.pi_id::text WHERE c.category_id = %s;""" cursor.execute(query_string, (category_id, )) return [dict(res) for res in cursor]
def get_all_personal_information(): connection, cursor = utils.connect_to_db( "foursquare", cursor_type=psycopg2.extras.DictCursor) query_string = """ SELECT name, category_id, pi_id, tags FROM personal_information ORDER BY category_id ASC, name ASC;""" cursor.execute(query_string) return [dict(res) for res in cursor]
def get_model_stats(): start_time = time.time() c_study, cur_study = utils.connect_to_db( "study", cursor_type=psycopg2.extras.DictCursor) c_fsq, cur_fsq = utils.connect_to_db( "foursquare", cursor_type=psycopg2.extras.DictCursor) print("init - %s" % (time.time() - start_time)) models = {} start_time = time.time() place_ids = get_all_distinct_place_ids(c_study, cur_study) print("got %s place ids" % len(place_ids)) for place_id in place_ids: # 1 - get the personal information relevance information ratings = get_relevance_ratings(place_id, connection=c_study, cursor=cur_study) # 2 - get the personal information computed by the models for this place start_time = time.time() pis = foursquare.get_place_personal_information_from_db( place_id, connection=c_fsq, cursor=cur_fsq) # 3 - aggregate per model # a model is defined by (model_type, feature_type, avg, phrase_modeler) for pi in pis: pi_id = pi['pi_id'] rank = pi['rank'] model = (pi['model_type'], pi['feature_type'], pi['avg'], pi['phrase_modeler']) if model not in models: models[model] = [[0] * 5 for _ in range(10)] rating = [] if pi_id not in ratings else ratings[pi_id]['ratings'] for r in rating: models[model][rank][r - 1] += 1 print("end - %s" % (time.time() - start_time)) return models
def get_foursquare_category_topics(category_id): connection, cursor = utils.connect_to_db( "foursquare", cursor_type=psycopg2.extras.DictCursor) query_string = """ SELECT topic_id, topic_rank, top_documents, top_words FROM place_category_topics WHERE category_id = %s ORDER BY topic_rank ASC;""" cursor.execute(query_string, (category_id, )) return [dict(res) for res in cursor]
def get_all_classifiers(): """ Returns all possible classifiers in a list. """ connection, cursor = utils.connect_to_db( "foursquare", cursor_type=psycopg2.extras.DictCursor) query = sql.SQL("""SELECT model_type, feature_type, avg, phrase_modeler FROM place_personal_information GROUP BY model_type, feature_type, avg, phrase_modeler;""") cursor.execute(query) return [dict(r) for r in cursor]
def get_nb_distinct_finished_sessions(connection=None, cursor=None): if connection is None or cursor is None: connection, cursor = utils.connect_to_db( "study", cursor_type=psycopg2.extras.DictCursor) query_string = sql.SQL( "SELECT COUNT(DISTINCT session_id) FROM sessions WHERE session_start IS NOT NULL AND session_end IS NOT NULL;" ) cursor.execute(query_string) return cursor.fetchone()[0]
def get_avg_nb_places_per_user(connection=None, cursor=None): if connection is None or cursor is None: connection, cursor = utils.connect_to_db( "study", cursor_type=psycopg2.extras.DictCursor) query_string = sql.SQL("""SELECT AVG(q.c) FROM (SELECT COUNT(place_id) as c FROM places GROUP BY session_id) as q;""") cursor.execute(query_string) return cursor.fetchone()[0]
def get_avg_session_duration(connection=None, cursor=None): if connection is None or cursor is None: connection, cursor = utils.connect_to_db( "study", cursor_type=psycopg2.extras.DictCursor) query_string = sql.SQL("""SELECT AVG(session_end-session_start) / 60 as avg FROM sessions WHERE session_start IS NOT NULL AND session_end IS NOT NULL AND session_end-session_start < 3600;""" ) cursor.execute(query_string) return cursor.fetchone()[0]
def save_personal_information_privacy(session_id, pi_id, rating): connection, cursor = utils.connect_to_db("study") query_string = """INSERT INTO place_personal_information_privacy (pi_id, session_id, rating) VALUES (%s, %s, %s) ON CONFLICT (session_id, pi_id) DO UPDATE SET rating=EXCLUDED.rating;""" data = (pi_id, session_id, rating) cursor.execute(query_string, data) connection.commit()
def setUp(self): def fake_get_active_tunnels(): ''' fake active tunnels for tests ''' return [12345] # fake get_active_tunnels call libraries.get_active_tunnels = fake_get_active_tunnels # clear db self.db = utils.connect_to_db(settings.ENV) utils.remove_all_data(self.db)
def db_connect(): time_delay = 5 max_attempts = 5 num_attempts = 0 while num_attempts <= max_attempts: try: cherrypy.db = utils.connect_to_db(settings.ENV) libraries.init(cherrypy.db) LOG.info('connected to db.') return except Exception: LOG.error('db connection error. will try again.', exc_info=True) time.sleep(time_delay) num_attempts += 1 if num_attempts == max_attempts: LOG.error('db connection error. did not connect...')
def tearDown(self): # clear db self.db = utils.connect_to_db(settings.ENV) utils.remove_all_data(self.db)
def thread_connect(thread_index): ''' Creates a db connection and stores it in the current thread http://tools.cherrypy.org/wiki/Databases ''' cherrypy.thread_data.db = utils.connect_to_db(settings.ENV)