示例#1
0
def pickle_simple():
    conn = IMDB()
    ids = conn.fetch_vec("SELECT DISTINCT movie_id FROM stars,title "
                         "WHERE movie_id= title.id ORDER BY production_year ASC")

    all_movies = [dict(Movie(conn, mid)) for mid in ids]
    with open('../data/movies.pkl','wb') as fp:
        pck.dump(all_movies,fp)
示例#2
0
文件: main.py 项目: sh1kn0z/IMDB-cli
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-t',
                        '--title',
                        nargs='+',
                        type=str,
                        required=True,
                        help='Title of the movie')
    parser.add_argument('--title_type',
                        type=str,
                        nargs='+',
                        choices=[
                            'feature', 'tv_movie', 'tv_series', 'tv_episode',
                            'tv_special', 'tv_miniseries', 'documentary',
                            'video_game', 'short', 'video', 'tv_short'
                        ],
                        help='The Title type of the movie')
    args = parser.parse_args()

    imdb = IMDB()
    if args.title_type:
        IMDB.title_type = args.title_type

    url = imdb.build_query_string(args.title, args.title_type)
    search_result = imdb.execute_query(url)
    imdb.extract_data(search_result)
    imdb.build_table()
示例#3
0
def create_movie_db():
    conn = IMDB()
    ids = conn.fetch_vec("SELECT DISTINCT movie_id FROM stars,title "
                         "WHERE movie_id= title.id ORDER BY production_year ASC")

    table = {key: [] for key in Movie._keys}

    for mov_id in ids:
        mov = Movie(conn, mov_id)
        for key in Movie._keys:
            table[key] += [mov[key]]

    df = pd.DataFrame(table)[Movie._keys]
    df.set_index('id')
    df.to_csv('../data/movies.csv')
    df.to_pickle('../data/movies.pkl')
示例#4
0
parser.add_argument("-t", "--title", type=str)
parser.add_argument("-y", "--year", type=int)
parser.add_argument("-g", "--genre", type=str,
                    choices=[\
'Film-Noir', 'History', 'Biography', 'Fantasy',\
'Thriller', 'Comedy', 'Horror', 'Musical',\
'Drama', 'Mystery', 'Western', 'Music',\
'Animation', 'Sport', 'Crime', 'War', 'Family',\
'Sci-Fi', 'Action', 'Adventure', 'Romance'])

args = parser.parse_args()

search_terms = args.director
if args.title:
    search_terms.insert(0, args.title)
search_terms += args.actor
if args.year is not None:
    search_terms.append(str(args.year))
if args.genre is not None:
    search_terms.append(args.genre)

if len(search_terms) == 0:
    print(
        "Please provide at least one search term (movie title, director, actor, genre, or year)"
    )
    print("Use --help for more info")
    sys.exit()

data = IMDB()
data.search(search_terms)
    def __init__(self, imdb_conn=None):

        self.imdb_conn = IMDB() if imdb_conn is None else imdb_conn
class DataFileGenerator(object):
    def __init__(self, imdb_conn=None):

        self.imdb_conn = IMDB() if imdb_conn is None else imdb_conn

    def fix(self, data_version_num=None):
        raise NotImplementedError

    def generate_csv(self, movie_vectorizer, movie_generator=None,
            limit=None):
        """
        :param movie_vectorizer: Instance of MovieVectorGenerator
        :param movie_generator: Generator of Movies
        :return: path to data_raw file
        """
        if limit is None:
            limit = 999999
        data_dir_ctrl = DataDirControl(str(movie_vectorizer))
        start_time = monotonic()
        data_dir = data_dir_ctrl.create_version()
        with open(data_dir + "about.txt", 'wb') as about_fp:
            about_fp.write("db : {}\n".format(self.imdb_conn.db))

        if movie_generator is None:
            movie_generator = self.imdb_conn.get_all_movies()

        succ_num = 0
        fail_num = 0
        total = 0

        with open(data_dir + "log.txt", 'wb', 0) as log_fp, \
                open(data_dir + "failed.txt", 'wb', 0) as fail_fp, \
                open(data_dir + "data_raw.csv", 'wb', 0) as data_fp:
            csv_writer = csv.writer(data_fp)
            csv_writer.writerow(['id'] + movie_vectorizer.header)

            for movie in movie_generator:
                if limit <= 0:
                    break
                limit -= 1
                total += 1
                try:
                    movie_vec = movie_vectorizer.get_vector(movie)
                    csv_writer.writerow([movie['id']] + movie_vec)
                    succ_num += 1
                except Exception, e:
                    fail_num += 1
                    log_fp.write(" {} : <{}> \n {} \n".format(movie['id'], e,
                        traceback.format_exc()))
                    fail_fp.write(str(movie['id']) + "\n")

        end_time = monotonic()
        with open(data_dir + "about.txt", 'ab') as res_fp:
            res_fp.writelines(["\n",
                "Runtime : {}\n".format(
                    timedelta(seconds=end_time - start_time)),


                "total movies : {}\n".format(total),
                "Success count : {} \n".format(succ_num),
                "Fail count : {} \n".format(fail_num)])
        print "DONE"
        return data_dir + "data_raw.csv"