def populate(engine: Engine, data_path: str): conn = engine.raw_connection() cursor = conn.cursor() global genres_global global actors_global global directors_global genres_global = dict() actors_global = dict() directors_global = dict() movies_list = [] for i in parse_movie_file(os.path.join(data_path, "Data1000Movies.csv")): movies_list.append(i) data1 = genres_global data2 = actors_global data3 = directors_global insert_directors = """ INSERT INTO directors ( id, director_full_name) VALUES (?, ?)""" cursor.executemany(insert_directors, generate_director()) insert_actors = """ INSERT INTO actors ( id, actor_full_name) VALUES (?, ?)""" cursor.executemany(insert_actors, generate_actor()) insert_genres = """ INSERT INTO genres ( id, genre_name) VALUES (?, ?)""" cursor.executemany(insert_genres, generate_genre()) insert_movies = """ INSERT INTO movies ( id, title, release_year, rank, description, runtime_minutes, director_id) VALUES (?, ?, ?, ?, ?, ?, ?)""" cursor.executemany(insert_movies, movies_list) insert_movie_genre = """ INSERT INTO movie_genre ( id, movie_id, genre_id) VALUES (?, ?, ?) """ cursor.executemany(insert_movie_genre, generate_genre_movie_relationship()) insert_movie_actor = """ INSERT INTO movie_actor ( id, movie_id, actor_id) VALUES (?, ?, ?) """ cursor.executemany(insert_movie_actor, generate_actor_movie_relationship()) conn.commit() conn.close()
def copy_files_to_db( table: sa.Table, file_paths: FilePaths, engine: Engine ): columns = extract_table_columns(table, exclude_autoincrement=True) # Extracting the underlying Psycopg2 connection to access # bulk loading features not exposed by SQLAlchemy db_conn = engine.raw_connection() with db_conn.cursor() as cursor: for path in file_paths: logger.info(f"Loading from file: {path}") with open(path, 'r') as file: cursor.copy_expert(f""" COPY {table.name} ( {','.join(columns)} ) FROM STDIN WITH CSV HEADER """, file) db_conn.commit() logger.info(f"Files loaded to table: '{table.name}'")
def iterate_csv_dir(db_engine: engine.Engine = None): current_dir = os.getcwd() db_config = config.DB() for _, services_dir, _ in os.walk(f'{current_dir}/csv'): for service_db in services_dir: db_engine = create_engine(db_config.print()) db_connection = db_engine.raw_connection() try: db_cursor = db_connection.cursor() db_cursor.execute( f"SELECT 1 FROM pg_catalog.pg_database WHERE datname = '{service_db}'" ) db_exists = db_cursor.fetchone() if not db_exists: db_cursor.execute(f'CREATE DATABASE {service_db}') db_cursor.close() db_connection.commit() finally: db_connection.close() db_engine.dispose() db_engine = create_engine(db_config.print(service_db)) for filename in os.listdir(f'{current_dir}/csv/{service_db}'): filename = os.path.splitext(filename)[0] from_csv_to_db(service_db, filename, db_engine) db_engine.dispose()
def write_to_db(self, eng: Engine): conn = eng.raw_connection() cursor = conn.cursor() try: cursor.callproc("create_class_section", [self.crn, self.class_dept, self.class_number, self.professor, self.capacity, self.registered, self.semester_id]) for meeting_time in self.meeting_times: meeting_time.write_to_db(cursor) for restriction in self.restrictions: restriction.write_to_db(cursor) except err.IntegrityError as e: code = e.args[0] if code == 1062: print("IGNORING.") else: conn.rollback() print("ERROR CREATING SECTION", e) except err.InternalError as e: conn.rollback() print("ERROR CREATING SECTION", e) finally: conn.commit() cursor.close() conn.close()
def populate(engine: Engine, data_path: str): conn = engine.raw_connection() cursor = conn.cursor() insert_movies = """ INSERT INTO movies ( id, title, release_year, description, runtime_minutes, director) VALUES (?, ?, ?, ?, ?, ?)""" cursor.executemany(insert_movies, movie_record_generator(data_path)) insert_genres = """ INSERT INTO genres ( name) VALUES (?)""" cursor.executemany(insert_genres, genre_record_generator(data_path)) insert_actors = """ INSERT INTO actors ( name) VALUES (?)""" cursor.executemany(insert_actors, actor_record_generator(data_path)) insert_directors = """ INSERT INTO directors ( name) VALUES (?)""" cursor.executemany(insert_directors, director_record_generator(data_path)) conn.commit() conn.close()
def populate(engine: Engine, data_path: str): conn = engine.raw_connection() cursor = conn.cursor() # keep insert_reviews = """ INSERT INTO review ( ID, username, movie_id, review, rating) VALUES (?, ?, ?, ?, ?)""" cursor.executemany(insert_reviews, generic_generator(os.path.join(data_path, 'reviews.csv'))) # keep insert_users = """ INSERT INTO users ( ID, username, password) VALUES (?, ?, ?)""" cursor.executemany(insert_users, generic_generator(os.path.join(data_path, 'users.csv'))) # keep insert_movies = """ INSERT INTO movies ( Rank, Title, Genre, Description, Director, Actors, Year, Runtime, Rating, Votes, Revenue, Metascore) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""" cursor.executemany(insert_movies, generic_generator(os.path.join(data_path, 'Data1000Movies.csv'))) # sqlite3.OperationalError: near "(": syntax error conn.commit() conn.close()
def populate(engine: Engine, data_path: str): conn = engine.raw_connection() cursor = conn.cursor() global directors, genres, actors directors = dict() genres = dict() actors = dict() csv_processor(os.path.join(data_path, 'Data1000Movies.csv')) insert_directors = """ INSERT INTO directors (id, director_full_name) VALUES (?, ?)""" cursor.executemany(insert_directors, director_generator()) insert_genres = """ INSERT INTO genres (id, genre_name) VALUES (?, ?)""" cursor.executemany(insert_genres, genre_generator()) insert_actors = """ INSERT INTO actors (id, actor_full_name) VALUES (?, ?)""" cursor.executemany(insert_actors, actor_generator()) insert_movies = """ INSERT INTO movies (id, title, release_year, description, director_id, runtime_minutes, rating, votes, revenue_in_millions, metascore) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""" cursor.executemany(insert_movies, movie_generator(os.path.join(data_path, 'Data1000Movies.csv'))) insert_movie_actors = """ INSERT INTO movie_actors (id, movie_id, actor_id) VALUES (?, ?, ?)""" cursor.executemany(insert_movie_actors, movie_actors_generator()) insert_movie_genres = """ INSERT INTO movie_genres (id, movie_id, genre_id) VALUES (?, ?, ?)""" cursor.executemany(insert_movie_genres, movie_genres_generator()) default_review = [1, 1, 'GOTG is my new favourite movie of all time!', 10, datetime.now(), 1] insert_reviews = """ INSERT INTO reviews (id, movie_id, review_text, rating, timestamp, user_id) VALUES (?, ?, ?, ?, ?, ?)""" cursor.execute(insert_reviews, default_review) default_user = [1, 'nton939', generate_password_hash('nton939Password'), 121] insert_users = """ INSERT INTO users (id, username, password, time_spent_watching_movies_minutes) VALUES (?, ?, ?, ?)""" cursor.execute(insert_users, default_user) conn.commit() conn.close()
def populate(engine: Engine, data_path: str): conn = engine.raw_connection() cursor = conn.cursor() global tags tags = dict() global actors actors = dict() insert_movies = """ INSERT INTO movies ( id, release_year, title, description, director_full_name, rating, rating_count) VALUES (?, ?, ?, ?, ?, ?, ?)""" cursor.executemany(insert_movies, movie_record_generator(data_path)) insert_tags = """ INSERT INTO tags ( id, genre_name) VALUES (?, ?)""" cursor.executemany(insert_tags, get_tag_records()) insert_actors = """ INSERT INTO actors ( id, actor_full_name) VALUES (?, ?)""" cursor.executemany(insert_actors, get_actor_records()) insert_movie_tags = """ INSERT INTO movie_tags ( id, movie_id, tag_id) VALUES (?, ?, ?)""" cursor.executemany(insert_movie_tags, movie_tags_generator()) insert_movie_actors = """ INSERT INTO movie_actors ( id, movie_id, actor_id) VALUES (?, ?, ?)""" cursor.executemany(insert_movie_actors, movie_actors_generator()) # insert_users = """ # INSERT INTO users ( # id, username, password) # VALUES (?, ?, ?)""" # cursor.executemany(insert_users, generic_generator(os.path.join(data_path, 'users.csv'), process_user)) # insert_comments = """ # INSERT INTO comments ( # id, user_id, article_id, comment, timestamp) # VALUES (?, ?, ?, ?, ?)""" # cursor.executemany(insert_comments, generic_generator(os.path.join(data_path, 'comments.csv'))) conn.commit() conn.close()
def populate(engine: Engine, data_path: str): conn = engine.raw_connection() cursor = conn.cursor() insert_users = """ INSERT INTO users ( id, username, password) VALUES (?, ?, ?)""" cursor.executemany(insert_users, generic_generator(os.path.join(data_path, 'users.csv'), process_user)) conn.commit() conn.close()
def populate(engine: Engine, data_path: str): conn = engine.raw_connection() cursor = conn.cursor() insert_movie = """ INSERT INTO movie ( id, title, genre, description, director, actors, release_year, runtime_minutes, rating, votes, revenue) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""" cursor.executemany(insert_movie, movie_record_generator(os.path.join(data_path, 'Data1000Movies.csv'))) conn.commit() conn.close()
def populate(engine: Engine, data_path: str): conn = engine.raw_connection() cursor = conn.cursor() global genres genres = dict() global actors actors = dict() global director insert_movies = """ INSERT INTO movies ( rank, year, title, director, genres, actors) VALUES (?, ?, ?, ?, ?, ?)""" cursor.executemany( insert_movies, movie_record_generator(os.path.join(data_path, 'Data1000Movies.csv'))) insert_genres = """ INSERT INTO genres ( id, name) VALUES (?, ?)""" cursor.executemany(insert_genres, get_genre_records()) insert_movie_genres = """ INSERT INTO movie_genres ( id, movie_id, genre_id) VALUES (?, ?, ?)""" cursor.executemany(insert_movie_genres, movie_genres_generator()) insert_users = """ INSERT INTO users ( id, username, password) VALUES (?, ?, ?)""" cursor.executemany( insert_users, generic_generator(os.path.join(data_path, 'users.csv'), process_user)) insert_reviews = """ INSERT INTO reviews ( id, user_id, movie_id, review, timestamp) VALUES (?, ?, ?, ?, ?)""" cursor.executemany( insert_reviews, generic_generator(os.path.join(data_path, 'reviews.csv'))) conn.commit() conn.close()
def populate(engine: Engine, data_path: str): conn = engine.raw_connection() cursor = conn.cursor() global genres global directors global actors genres = dict() directors = dict() actors = dict() insert_movies = """ INSERT INTO movies ( id, title, description, director_id, year, runtime, first_letter) VALUES (?, ?, ?, ?, ?, ?, ?)""" cursor.executemany( insert_movies, movie_record_generator(os.path.join(data_path, 'movies.csv'))) insert_genres = """ INSERT INTO genres (id, name) VALUES (?, ?)""" cursor.executemany(insert_genres, get_genre_records()) insert_movie_genres = """ INSERT INTO movie_genres (id, movie_id, genre_id) VALUES (?, ?, ?)""" cursor.executemany(insert_movie_genres, movie_genres_generator()) insert_directors = """ INSERT INTO directors (id, fullname) VALUES (?, ?)""" cursor.executemany(insert_directors, get_director_records()) insert_actors = """ INSERT INTO actors (id, fullname) VALUES (?, ?)""" cursor.executemany(insert_actors, get_actor_records()) insert_movie_actors = """ INSERT INTO movie_actors (id, movie_id, actor_id) VALUES (?, ?, ?)""" cursor.executemany(insert_movie_actors, movie_actors_generator()) conn.commit() conn.close()
def populate(engine: Engine, data_path: str): conn = engine.raw_connection() cursor = conn.cursor() global tags tags = dict() insert_articles = """ INSERT INTO articles ( id, date, title, first_para, hyperlink, image_hyperlink) VALUES (?, ?, ?, ?, ?, ?)""" cursor.executemany( insert_articles, article_record_generator(os.path.join(data_path, 'Data1000Movies.csv'))) insert_tags = """ INSERT INTO tags ( id, name) VALUES (?, ?)""" cursor.executemany(insert_tags, get_tag_records()) insert_article_tags = """ INSERT INTO article_tags ( id, article_id, tag_id) VALUES (?, ?, ?)""" cursor.executemany(insert_article_tags, article_tags_generator()) insert_users = """ INSERT INTO users ( id, username, password) VALUES (?, ?, ?)""" cursor.executemany( insert_users, generic_generator(os.path.join(data_path, 'users.csv'), process_user)) insert_comments = """ INSERT INTO comments ( id, user_id, article_id, comment, timestamp) VALUES (?, ?, ?, ?, ?)""" cursor.executemany( insert_comments, generic_generator(os.path.join(data_path, 'comments.csv'))) conn.commit() conn.close()
def populate(engine: Engine, data_path: str): conn = engine.raw_connection() cursor = conn.cursor() global genres genres = dict() insert_movies = """ INSERT INTO movies ( date, title, first_para, hyperlink, image_hyperlink, id, rating, back_hyperlink, runtime, director, actors) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""" cursor.executemany( insert_movies, movie_record_generator(os.path.join(data_path, 'Data1000Movies.csv'))) insert_genres = """ INSERT INTO genres ( id, name) VALUES (?, ?)""" cursor.executemany(insert_genres, get_genre_records()) insert_movie_genres = """ INSERT INTO movie_genres ( id, movie_id, genre_id) VALUES (?, ?, ?)""" cursor.executemany(insert_movie_genres, movie_genres_generator()) insert_users = """ INSERT INTO users ( id, username, password) VALUES (?, ?, ?)""" cursor.executemany( insert_users, generic_generator(os.path.join(data_path, 'users.csv'), process_user)) insert_reviews = """ INSERT INTO reviews ( id, user_id, movie_id, review, timestamp, rating) VALUES (?, ?, ?, ?, ?, ?)""" cursor.executemany( insert_reviews, generic_generator(os.path.join(data_path, 'comments.csv'))) conn.commit() conn.close()
def populate(engine: Engine, data_path: str): conn = engine.raw_connection() cursor = conn.cursor() global tags tags = dict() insert_articles = """ INSERT INTO articles ( id, title, genres, description, director, actors, release_year, runtime, rating, votes, revenue, metascore) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""" cursor.executemany( insert_articles, article_record_generator(os.path.join(data_path, 'news_articles.csv'))) insert_tags = """ INSERT INTO tags ( id, name) VALUES (?, ?)""" cursor.executemany(insert_tags, get_tag_records()) insert_article_tags = """ INSERT INTO article_tags ( id, article_id, tag_id) VALUES (?, ?, ?)""" cursor.executemany(insert_article_tags, article_tags_generator()) insert_users = """ INSERT INTO users ( id, username, password) VALUES (?, ?, ?)""" cursor.executemany( insert_users, generic_generator(os.path.join(data_path, 'users.csv'), process_user)) insert_comments = """ INSERT INTO comments ( id, user_id, article_id, review_text, rating) VALUES (?, ?, ?, ?, ?)""" cursor.executemany( insert_comments, generic_generator(os.path.join(data_path, 'comments.csv'))) conn.commit() conn.close()
def write_to_db(self, eng: Engine, for_course: Course = None): conn = eng.raw_connection() cursor = conn.cursor() try: gid = -1 for course in self.courses: cursor.callproc("create_coreq_for_class", [ for_course.c_subject, for_course.c_number, course.c_subject, course.c_number, gid ]) for r in cursor.fetchall(): gid = r[0] except exc.IntegrityError as e: print("ERROR CREATING COREQS", e) finally: conn.commit() cursor.close() conn.close()
def populate(engine: Engine, session_factory, data_path, data_filename): conn = engine.raw_connection() cursor = conn.cursor() conn.commit() conn.close() insert_users = """ INSERT INTO users ( id, username, password) VALUES (?, ?, ?)""" cursor.executemany( insert_users, generic_generator(os.path.join(data_path, 'users.csv'), process_user)) insert_comments = """ INSERT INTO comments ( id, user_id, movie_id, comment, timestamp) VALUES (?, ?, ?, ?, ?)""" cursor.executemany( insert_comments, generic_generator(os.path.join(data_path, 'comments.csv'))) conn.commit() conn.close() filename = os.path.join(data_path, data_filename) movie_file_reader = MovieFileCSVReader(filename) movie_file_reader.read_csv_file() session = session_factory() for movie in movie_file_reader.dataset_of_movies: print(movie) session.add(movie) for actor in movie_file_reader.dataset_of_actors: session.add(actor) for genre in movie_file_reader.dataset_of_genres: session.add(genre) for director in movie_file_reader.dataset_of_directors: session.add(director) session.commit()
def populate(engine: Engine, data_path: str): conn = engine.raw_connection() cursor = conn.cursor() global genres genres = dict() insert_movies = """ INSERT INTO movies ( id, title, year, description, runtime, ratings) VALUES (?, ?, ?, ?, ?, ?)""" cursor.executemany( insert_movies, generic_generator(os.path.join(data_path, 'Data1000Movies.csv'))) insert_genres = """ INSERT INTO genres ( id, name) VALUES (?, ?)""" cursor.executemany(insert_genres, get_genre_records()) insert_movie_genres = """ INSERT INTO movie_genres ( id, movie_id, genre_id) VALUES (?, ?, ?)""" cursor.executemany(insert_movie_genres, movie_genres_generator()) """ insert_users = "" INSERT INTO users ( id, username, password) VALUES (?, ?, ?)"" cursor.executemany(insert_users, generic_generator(os.path.join(data_path, 'users.csv'), process_user)) insert_comments = "" INSERT INTO comments ( id, user_id, article_id, comment, timestamp) VALUES (?, ?, ?, ?, ?)"" cursor.executemany(insert_comments, generic_generator(os.path.join(data_path, 'comments.csv'))) """ conn.commit() conn.close()
def pg_execute_values(eng: Engine, sql: str, records: List[Dict], page_size: int = 5000): conn = eng.raw_connection() try: with conn.cursor() as curs: execute_values( curs, sql, records, template=None, page_size=page_size, ) conn.commit() except Exception as e: conn.rollback() raise e finally: conn.close()
def populate(engine: Engine, data_path: str): connect = engine.raw_connection() cursor = connect.cursor() insert_movies = """ INSERT INTO movies ( id,title,description,time,runtime,rating,votes,revenue,metascore) VALUES ( ?,?,?,?,?,?,?,?,?)""" cursor.executemany(insert_movies, movie_record_generator(data_path)) insert_directors = """ INSERT INTO directors ( movie,director_full_name,rating,rating_num) VALUES (?,?,0,0)""" cursor.executemany(insert_directors, director_record_generator(data_path)) genre_record_generator(data_path, cursor) actor_record_generator(data_path, cursor) connect.commit() connect.close()
def populate(engine: Engine, data_path: str): conn = engine.raw_connection() cursor = conn.cursor() insert_movies = """ INSERT INTO movies ( id, title, release_date, rank, description, director, actors, genres) VALUES (?, ?, ?, ?, ?, ?, ?)""" cursor.executemany( insert_movies, movie_record_generator(os.path.join(data_path, 'Data1000Movies.csv'))) insert_users = """ INSERT INTO users( id, username, password) VALUES (?, ?, ?)""" cursor.executemany( insert_users, generic_generator(os.path.join(data_path, 'users.csv'), process_user())) conn.commit() conn.close()
def write_to_db(self, eng: Engine, for_course: Course, join_type, parent_group=-1, conn=None, cursor=None): p_group = parent_group if len(self.groups) > 0: if isinstance(cursor, type(None)): conn = eng.raw_connection() cursor = conn.cursor() try: cursor.callproc("create_group_prereq_for_class", [ for_course.c_subject if p_group == -1 else None, for_course.c_number if p_group == -1 else None, self.join_with if join_type is "" else join_type, p_group ]) for r in cursor.fetchall(): p_group = r[0] for group in self.groups: group.write_to_db(eng, for_course, self.join_with, p_group, conn, cursor) except err.IntegrityError as e: code = e.args[0] if code == 1062: print("IGNORING.") else: conn.rollback() print("ERROR CREATING PREREQS", e) finally: if parent_group is -1: conn.commit() cursor.close() conn.close()
def ping(engine: Engine) -> bool: with closing(engine.raw_connection()) as conn: return engine.dialect.do_ping(conn)
def populate(engine: Engine, data_path: str): connection = engine.raw_connection() cursor = connection.cursor() global genres global actors global directors genres = dict() actors = dict() directors = dict() insert_movies = """ INSERT INTO movies (id, title, description, director, release_year, runtime_minutes, revenue) VALUES(?, ?, ?, ?, ?, ?, ?)""" cursor.executemany( insert_movies, movie_record_generator(os.path.join(data_path, 'movies.csv'))) insert_genres = """ INSERT INTO genres(id, name) VALUES(?, ?)""" cursor.executemany(insert_genres, get_genre_records()) insert_actors = """ INSERT INTO actors(id, name) VALUES(?, ?)""" cursor.executemany(insert_actors, get_actor_records()) insert_directors = """ INSERT INTO directors(id, name) VALUES(?, ?)""" cursor.executemany(insert_directors, get_director_records()) insert_users = """ INSERT INTO users( id, username, password) VALUES(?, ?, ?)""" cursor.executemany( insert_users, generic_generator(os.path.join(data_path, 'users.csv'), process_user)) insert_reviews = """ INSERT INTO reviews( id, user_id, movie_id, review_text, ratings, timestamp) VALUES (?, ?, ?, ?, ?, ?)""" cursor.executemany( insert_reviews, generic_generator(os.path.join(data_path, 'reviews.csv'))) insert_movie_genres = """ INSERT INTO movie_genres( id, movie_id, genre_id) VALUES(?, ?, ?)""" cursor.executemany(insert_movie_genres, movie_genres_generator()) insert_movie_actors = """ INSERT INTO movie_actors( id, movie_id, actor_id) VALUES(?, ?, ?)""" cursor.executemany(insert_movie_actors, movie_actors_generator()) # insert_movie_directors = """ # INSERT INTO movie_directors( # id, movie_id, director_id) # VALUES(?, ?, ?)""" # cursor.executemany(insert_movie_directors, movie_directors_generator()) connection.commit() connection.close()
def populate(db_engine: Engine, data_path: str): conn = db_engine.raw_connection() cursor = conn.cursor() # file_reader = MovieFileCSVReader(path_join(data_path, 'Data1000Movies.csv')) # file_reader.read_csv_file() with open(path_join(data_path, 'Data1000Movies.csv'), encoding='utf-8-sig') as file_data: reader = csv.DictReader(file_data) for row in reader: temp_str = row['Director'].strip().replace("'", "''") result = cursor.execute( f"SELECT id FROM directors WHERE full_name = '{temp_str}'" ).fetchone() if not result: cursor.execute( f"INSERT INTO directors (full_name) VALUES ('{temp_str}')") conn.commit() result = cursor.execute( f"SELECT id FROM directors WHERE full_name = '{temp_str}'" ).fetchone() # print(result) if row['Runtime (Minutes)'].isdigit(): movie_runtime = int(row['Runtime (Minutes)']) else: movie_runtime = 0 if row['Year'].isdigit(): release_year = int(row['Year']) else: release_year = 0 movie_title = row['Title'].replace("'", "''") movie_description = row['Description'].replace("'", "''") cursor.execute( f"INSERT INTO movies (title, release_year, description, director_id, runtime_minutes) VALUES ('{movie_title}', {release_year}, '{movie_description}', {result[0]}, {movie_runtime})" ) conn.commit() movie_index = cursor.execute( f"SELECT id FROM movies WHERE title = '{movie_title}' AND release_year = {release_year}" ).fetchone() genres = [i.strip() for i in row['Genre'].split(',')] for item in genres: result = cursor.execute( f"SELECT id FROM genres WHERE name = '{item}'").fetchone() if not result: cursor.execute( f"INSERT INTO genres (name) VALUES ('{item}')") conn.commit() result = cursor.execute( f"SELECT id FROM genres WHERE name = '{item}'" ).fetchone() cursor.execute( f"INSERT INTO movies_genres (movie_id, genre_id) VALUES ({movie_index[0]}, {result[0]})" ) conn.commit() actors = [ i.strip().replace("'", "''") for i in row['Actors'].split(',') ] for item in actors: result = cursor.execute( f"SELECT id FROM actors WHERE full_name = '{item}'" ).fetchone() if not result: cursor.execute( f"INSERT INTO actors (full_name) VALUES ('{item}')") conn.commit() result = cursor.execute( f"SELECT id FROM actors WHERE full_name = '{item}'" ).fetchone() cursor.execute( f"INSERT INTO movies_actors (movie_id, actor_id) VALUES ({movie_index[0]}, {result[0]})" ) conn.commit() with open(path_join(data_path, 'user_data.csv'), encoding='utf-8-sig') as file_data: reader = csv.DictReader(file_data) for row in reader: username = row['username'].strip().replace("'", "''") pass_hash = row['password_hash'].strip().replace("'", "''") if row['time_spent_watching_movies'].isdigit(): time_spent = int(row['time_spent_watching_movies'].strip()) else: time_spent = 0 cursor.execute( f"INSERT INTO users (username, password, time_spent_watching_movies) VALUES ('{username}', '{pass_hash}', {time_spent})" ) conn.commit() conn.close()
def populate(engine: Engine, data_path: str): conn = engine.raw_connection() cursor = conn.cursor() populate_from_movie_csv(cursor, data_path) conn.commit() conn.close()
def populate(engine: Engine, data_path: str): conn = engine.raw_connection() cursor = conn.cursor() global directors directors = dict() global genres genres = dict() global actors actors = dict() insert_movies = """ INSERT INTO movies ( rank,title,genre,description,director,actors,year,runtime,rating,votes,revenue,metascore) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""" cursor.executemany( insert_movies, movie_record_generator(os.path.join(data_path, 'Data1000Movies.csv'))) insert_directors = """ INSERT INTO directors ( id, name) VALUES (?, ?)""" cursor.executemany(insert_directors, get_director_records()) insert_genres = """ INSERT INTO genres ( id, name) VALUES (?, ?)""" cursor.executemany(insert_genres, get_genre_records()) insert_actors = """ INSERT INTO actors ( id, name) VALUES (?, ?)""" cursor.executemany(insert_actors, get_actor_records()) insert_movie_directors = """ INSERT INTO movie_directors ( id, movie_rank, director_id) VALUES (?, ?, ?)""" cursor.executemany(insert_movie_directors, movie_directors_generator()) insert_movie_genres = """ INSERT INTO movie_genres ( id, movie_rank, genre_id) VALUES (?, ?, ?)""" cursor.executemany(insert_movie_genres, movie_genres_generator()) insert_movie_actors = """ INSERT INTO movie_actors ( id, movie_rank, actor_id) VALUES (?, ?, ?)""" cursor.executemany(insert_movie_actors, movie_actors_generator()) insert_users = """ INSERT INTO users ( id, username, password) VALUES (?, ?, ?)""" cursor.executemany( insert_users, generic_generator(os.path.join(data_path, 'users.csv'), process_user)) insert_comments = """ INSERT INTO comments ( id, user_id, movie_rank, comment, timestamp) VALUES (?, ?, ?, ?, ?)""" cursor.executemany( insert_comments, generic_generator(os.path.join(data_path, 'comments.csv'))) conn.commit() conn.close()
def populate(engine: Engine, data_path: str): conn = engine.raw_connection() cursor = conn.cursor() reader = MovieFileCSVReader(os.path.join(data_path, 'data1000Movies.csv')) reader.read_csv_file() insert_directors = """INSERT INTO directors (name) VALUES (?)""" directors = [] for director in reader.dataset_of_directors: director_tuple = tuple([director.director_full_name]) directors.append(director_tuple) cursor.executemany(insert_directors, directors) insert_actors = """INSERT INTO actors (name) VALUES (?)""" actors = [] for actor in reader.dataset_of_actors: actor_tuple = tuple([actor.actor_full_name]) actors.append(actor_tuple) cursor.executemany(insert_actors, actors) insert_genres = """INSERT INTO genres (name) VALUES (?)""" genres = [] for genre in reader.dataset_of_genres: genre_tuple = tuple([genre.genre_name]) genres.append(genre_tuple) cursor.executemany(insert_genres, genres) insert_movies = """ INSERT INTO movies ( title, release_year, description, director_id, runtime_minutes) VALUES (?, ?, ?, ?, ?)""" movies = [] for movie in reader.dataset_of_movies: movie_director_id = cursor.execute( 'SELECT id FROM directors WHERE name = "' + movie.director.director_full_name + '"' ).fetchone()[0] movie_tuple = (movie.title, movie.release_year, movie.description, movie_director_id, movie.runtime_minutes) movies.append(movie_tuple) cursor.executemany(insert_movies, movies) insert_movie_actors = """INSERT INTO movie_actors (movie_id, actor_id) VALUES (?, ?)""" insert_movie_genres = """INSERT INTO movie_genres (movie_id, genre_id) VALUES (?, ?)""" insert_actor_colleagues = """INSERT INTO actor_colleagues (actor_id, colleague_id) VALUES (?, ?)""" movie_actors = [] movie_genres = [] actor_colleagues = [] for movie in reader.dataset_of_movies: movie_id = cursor.execute( 'SELECT id FROM movies ' 'WHERE title = "' + movie.title + '" AND release_year = "' + str(movie.release_year) + '"' ).fetchone()[0] for actor in movie.actors: actor_id = cursor.execute( 'SELECT id FROM actors WHERE name = "' + actor.actor_full_name + '"' ).fetchone()[0] movie_actors.append((movie_id, actor_id)) for colleague in movie.actors: if not actor.check_if_this_actor_worked_with(colleague) and actor != colleague: colleague_id = cursor.execute( 'SELECT id FROM actors WHERE name = "' + colleague.actor_full_name + '"' ).fetchone()[0] actor_colleague_tuple = (actor_id, colleague_id) actor_colleagues.append(actor_colleague_tuple) for genre in movie.genres: genre_id = cursor.execute( 'SELECT id FROM genres WHERE name = "' + genre.genre_name + '"' ).fetchone()[0] movie_genres.append((movie_id, genre_id)) cursor.executemany(insert_movie_actors, movie_actors) cursor.executemany(insert_movie_genres, movie_genres) cursor.executemany(insert_actor_colleagues, actor_colleagues) # user = User('Myles Kennedy', '123') # movie1 = Movie('Inception', 2010) # review = Review(movie1, "Absolutely incredible movie!", 10) # user.add_review(review) # movie2 = Movie("The Da Vinci Code", 2006) # user.watchlist.add_movie(movie1) # user.watchlist.add_movie(movie2) # cursor.execute( # "INSERT INTO users (user_name, password, time_spent_watching_movies_minutes) VALUES (?, ?, 0)", # (user.user_name, generate_password_hash(user.password)) # ) conn.commit() conn.close()
def populate(engine: Engine, data_path: str): movie_filename = os.path.join(data_path, 'Data1000Movies.csv') movie_comment_filename = os.path.join(data_path, 'MovieComment.csv') director_comment_filename = os.path.join(data_path, 'DirectorComment.csv') actor_comment_filename = os.path.join(data_path, 'ActorComment.csv') user_filename = os.path.join(data_path, 'user.csv') conn = engine.raw_connection() cursor = conn.cursor() global all_directors global all_genres global all_actors all_directors = list() all_genres = dict() all_actors = dict() insert_movies = """ INSERT INTO movies ( id, title, release_year, description, director, runtime_minutes) VALUES (?, ?, ?, ?, ?, ?)""" cursor.executemany(insert_movies, generate_movies(movie_filename)) insert_directors = """ INSERT INTO directors ( id, name) VALUES (?, ?)""" cursor.executemany(insert_directors, generate_director()) insert_actors = """ INSERT INTO actors ( id, name) VALUES (?, ?)""" cursor.executemany(insert_actors, generate_actor()) insert_genres = """ INSERT INTO genres ( id, name) VALUES (?, ?)""" cursor.executemany(insert_genres, generate_genre()) insert_users = """ INSERT INTO users ( id, username, password) VALUES (?, ?, ?)""" cursor.executemany(insert_users, generate_users(user_filename)) insert_movie_genre = """ INSERT INTO movie_genres ( id, movie_id, genre_id) VALUES (?, ?, ?)""" cursor.executemany(insert_movie_genre, generate_movie_genres()) insert_movie_actor = """ INSERT INTO movie_actors ( id, movie_id, actor_id) VALUES (?, ?, ?)""" cursor.executemany(insert_movie_actor, generate_movie_actors()) insert_movie_comments = """ INSERT INTO movie_comments ( id, username, name, timestamp, text) VALUES (?, ?, ?, ?, ?)""" cursor.executemany(insert_movie_comments, generate_movie_comments(movie_comment_filename)) insert_director_comments = """ INSERT INTO director_comments ( id, username, name, timestamp, text) VALUES (?, ?, ?, ?, ?)""" cursor.executemany(insert_director_comments, generate_director_comments(director_comment_filename)) insert_actor_comments = """ INSERT INTO actor_comments ( id, username, name, timestamp, text) VALUES (?, ?, ?, ?, ?)""" cursor.executemany(insert_actor_comments, generate_actor_comments(actor_comment_filename)) conn.commit() conn.close()