Пример #1
0
def define_Controversy(db_loc):
    from Scripts import db_ec
    contro_table_stat = '''CREATE TABLE IF NOT EXISTS contRatings(
                            movieId integer,
                            ratio real,
                            PolarFreq real,
                            TotalVotes integer,
                            AvgRating real,
                            
                            
                            primary key (movieId),
                            foreign key (movieId) references ratings(movieId),
                            foreign key (movieId) references metadata(ratingId));'''

    calc_stat_data = '''SELECT t.movieId, t.HighCount*1.0/t.LowCount as Ratio, 
                            ((t.HighCount + t.LowCount)*1.0/t.TotalCount) as PolarFreq, 
                            t.TotalCount, t.AvgRating
                     FROM (SELECT movieId, 
                          COUNT(CASE WHEN rating <= 1 THEN rating END) as LowCount,
                          COUNT(CASE WHEN rating > 4 THEN rating END) as HighCount,
                          COUNT(rating) as TotalCount,
                          AVG(rating) as AvgRating
                          
                          FROM ratings
                          GROUP BY movieId)  t
                    ;
                  '''

    try:
        conn = db_ec.connect_db(db_loc)
        db_ec.create_table(conn, contro_table_stat)
        conn.commit()
    except:
        print("Error creating table")

    finally:
        conn.close()

    ##load controversy table

    try:
        conn = db_ec.connect_db(db_loc)
        cur = conn.cursor()

        cont_dat = cur.execute(calc_stat_data)

        cur.executemany(
            'INSERT OR REPLACE INTO contRatings(movieId,ratio,PolarFreq,TotalVotes, AvgRating) values (?,?,?,?,?)',
            cont_dat.fetchall())
        conn.commit()
    finally:
        conn.close()
Пример #2
0
def movieRatings_load(db_loc, rating_csv_loc):

    from Scripts import db_ec
    ratings_stat = '''CREATE TABLE IF NOT EXISTS ratings(
                            userID integer,
                            movieId integer,
                            rating real,
                            timestamp integer,
                            
                            primary key(userID,movieID, timestamp))
                            
                            ;                    
                            '''
    try:
        conn = db_ec.connect_db(db_loc)
        db_ec.create_table(conn, ratings_stat)
    except:
        print("Error creating table 2")

    finally:
        conn.close()

    ##load data from clean ratings data
    try:
        conn = db_ec.connect_db(db_loc)
        cur = conn.cursor()

        with open(rating_csv_loc, 'r') as myfile:
            reader = csv.DictReader(myfile)

            to_db = [(i['userId'], i['movieId'], i['rating'], i['timestamp'])
                     for i in reader]

            cur.executemany(
                '''INSERT OR REPLACE into ratings(userID,movieID,rating,timestamp) 
                            VALUES(?,?,?,?);''', to_db)

            conn.commit()

    except:
        print('Error loading ratings')
    finally:
        conn.close()
Пример #3
0
def movieKeywords_load(db_loc, keyword_csv_loc):

    from Scripts import db_ec
    keyword_stat = '''CREATE TABLE IF NOT EXISTS keywords(
                            id integer, 
                            keywords text,
                            
                            primary key(id),
                            foreign key(id) references metadata(id)
                            );
                            '''
    try:
        conn = db_ec.connect_db(db_loc)
        db_ec.create_table(conn, keyword_stat)
    except:
        print("Error creating table")

    finally:
        conn.close()

    try:
        conn = db_ec.connect_db(db_loc)
        cur = conn.cursor()

        with open(keyword_csv_loc, 'r', encoding='utf8') as myfile:
            reader = csv.DictReader(myfile)

            to_db = [(i['id'], i['keywords']) for i in reader]

            cur.executemany(
                '''INSERT OR REPLACE into keywords(id,keywords) 
                            VALUES(?,?);''', to_db)

            conn.commit()

    except:
        print('Error loading keywords')
    finally:
        conn.close()
Пример #4
0
def movieCSVcur(db_loc, statement, output):

    from Scripts import db_ec
    conn = db_ec.connect_db(db_loc)
    cur = conn.cursor()

    cur.execute(statement)
    with open(output, 'w', encoding='utf8') as myfile:
        writer = csv.writer(myfile)
        writer.writerow(i[0] for i in cur.description)
        writer.writerows(cur)

    conn.close()
Пример #5
0
def movieMeta_load(db_loc, meta_csv_loc):
    from Scripts import db_ec

    meta_stat = '''CREATE TABLE IF NOT EXISTS metadata(
                            adult text, 
                            belongs_to_collection text,
                            budget integer,
                            genres text,
                            homepage text,
                            id integer,
                            imdb_id text,
                            original_language text,
                            original_title text,
                            overview text,
                            popularity real,
                            poster_path text,
                            production_companies text,
                            production_countries text,
                            release_date text,
                            revenue real,
                            runtime real,
                            spoken_languages text,
                            status text,
                            tagline text,
                            title text,
                            video text, 
                            vote_average real,
                            vote_count real,
                            ratingId integer,
                            
                            primary key(id),
                            foreign key (ratingId) references ratings(movieId))                       
                            ;
                    '''

    try:
        conn = db_ec.connect_db(db_loc)
        db_ec.create_table(conn, meta_stat)
    except:
        print("Error creating metadata table")
    finally:
        conn.close()

    ##load
    try:
        conn = db_ec.connect_db(db_loc)
        cur = conn.cursor()

        records = cur.execute('SELECT * FROM metadata;')
        #clear current records
        if records:
            cur.execute("DELETE FROM metadata;")
            conn.commit()

        with open(meta_csv_loc, 'r', encoding="utf8") as myfile:

            reader = csv.DictReader(myfile)

            to_db = [
                (i['adult'], i['belongs_to_collection'], i['budget'],
                 i['genres'], i['homepage'], i["id"], i["imdb_id"],
                 i["original_language"], i["original_title"], i["overview"],
                 i["popularity"], i["poster_path"], i["production_companies"],
                 i["production_countries"], i["release_date"], i["revenue"],
                 i["runtime"], i["spoken_languages"], i["status"],
                 i["tagline"], i["title"], i["video"], i["vote_average"],
                 i["vote_count"], i["ratingId"]) for i in reader
            ]

            cur.executemany(
                '''INSERT OR REPLACE INTO metadata (adult, belongs_to_collection,budget,genres, homepage, id,
                            imdb_id, original_language,original_title,overview,popularity,poster_path,
                            production_companies,production_countries,release_date,revenue,runtime,spoken_languages,
                            status,tagline,title,video,vote_average,vote_count, ratingId) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?);
                             ''', to_db)

        conn.commit()
    except:
        print("Error loading DB")
    finally:
        conn.close()
Пример #6
0
    (dat['PolarFreq'] > PolarRating), 1, 0)

print("Contrversial Ratio set between " + str(ratio) + " and " +
      str(1 / ratio))
print("Polar Voting must make up  " + str(PolarRating) +
      " percentile of ratings")
##add rewrites
dat['Rewrite'] = np.where(dat['title'] != dat['original_title'], 1, 0)

##pull keywords from movies after 1994 and with more than 30 votes

stat = '''SELECT k.id, k.keywords
            FROM metadata m, keywords k, contratings r
            WHERE k.id = m.id AND r.movieId = m.ratingId AND m.release_date > '1994-12-31' AND r.TotalVotes > 30;'''

conn = db_ec.connect_db(db_loc)
keywords_Df = pd.read_sql_query(stat, conn)

print("Keywords pulled from database")


##format from json to list
def get_key_name_list(x):
    if isinstance(x, list):
        names = [i['name'] for i in x]
        return names
    return []


keywords_Df['keywords'] = keywords_Df['keywords'].apply(literal_eval)
keywords_Df['keywords'] = keywords_Df['keywords'].apply(get_key_name_list)