def save_all(folder, li): with open(DATASET) as dataset: for movie_line in dataset: if movie_line.split('|')[0] in li: movie_line = movie_line.rstrip('\n') movie = Movie(movie_line) movie.save(folder)
def calc_distance(movie_id, folder, matrix): movie = Movie.load(folder, movie_id) with open(DATASET) as dataset: for movie_line in dataset: temp_id = movie_line.split('|')[0] if int(temp_id) > int(movie.id.value): temp_string = movie_line.rstrip('\n') temp = Movie(temp_string) distance = movie - temp matrix.update(movie.id.value, temp_id, distance)
def add_all_ratings(folder): with open(DATASET) as dataset: for movie_line in dataset: movie_id = movie_line.split('|')[0] movie = Movie.load(folder, movie_id) zeros = '0' * (7 - len(movie_id)) ratings_file = '{0}/mv_{1}{2}.txt'.format(TRAINING, zeros, movie_id) ratings_df = pd.read_csv(ratings_file, header=None, names=['user_id', 'rating', 'date'], skiprows=1, parse_dates=['date'], infer_datetime_format=True) movie.ratings = ratings_df movie.save(folder)
movie = Movie.load(folder, movie_id) zeros = '0' * (7 - len(movie_id)) ratings_file = '{0}/mv_{1}{2}.txt'.format( TRAINING, zeros, movie_id) ratings_df = pd.read_csv(ratings_file, header=None, names=['user_id', 'rating', 'date'], skiprows=1, parse_dates=['date'], infer_datetime_format=True) movie.ratings = ratings_df movie.save(folder) if __name__ == '__main__': folder = '/media/mariam/Files/ran/clacket-save' li = [] with open(DATASET) as dataset: for movie_line in dataset: movie_id = movie_line.split('|')[0] try: m = Movie.load(folder, movie_id) except EOFError: li.append(movie_id) print('Found {0} movies in need of fixing...'.format(len(li))) print(str(li)) print("Saving...") save_all(folder, li) print("Adding ratings...") add_all_ratings(folder, li)
def save_all(folder): with open(DATASET) as dataset: for movie_line in dataset: movie_line = movie_line.rstrip('\n') movie = Movie(movie_line) movie.save(folder)
def valid_movie(): string = "1|Dinosaur Planet|2003|"\ "Documentary,Animation,Family|"\ "Christian Slater,Scott Sampson|"\ "N/A|N/A|English|USA|series" return Movie(string)