Пример #1
0
def save_all(folder, li):
    with open(DATASET) as dataset:
        for movie_line in dataset:
            if movie_line.split('|')[0] in li:
                movie_line = movie_line.rstrip('\n')
                movie = Movie(movie_line)
                movie.save(folder)
Пример #2
0
def calc_distance(movie_id, folder, matrix):
    movie = Movie.load(folder, movie_id)
    with open(DATASET) as dataset:
        for movie_line in dataset:
            temp_id = movie_line.split('|')[0]
            if int(temp_id) > int(movie.id.value):
                temp_string = movie_line.rstrip('\n')
                temp = Movie(temp_string)
                distance = movie - temp
                matrix.update(movie.id.value, temp_id, distance)
Пример #3
0
def add_all_ratings(folder):
    with open(DATASET) as dataset:
        for movie_line in dataset:
            movie_id = movie_line.split('|')[0]
            movie = Movie.load(folder, movie_id)
            zeros = '0' * (7 - len(movie_id))
            ratings_file = '{0}/mv_{1}{2}.txt'.format(TRAINING, zeros,
                                                      movie_id)
            ratings_df = pd.read_csv(ratings_file,
                                     header=None,
                                     names=['user_id', 'rating', 'date'],
                                     skiprows=1,
                                     parse_dates=['date'],
                                     infer_datetime_format=True)
            movie.ratings = ratings_df
            movie.save(folder)
Пример #4
0
                movie = Movie.load(folder, movie_id)
                zeros = '0' * (7 - len(movie_id))
                ratings_file = '{0}/mv_{1}{2}.txt'.format(
                    TRAINING, zeros, movie_id)
                ratings_df = pd.read_csv(ratings_file,
                                         header=None,
                                         names=['user_id', 'rating', 'date'],
                                         skiprows=1,
                                         parse_dates=['date'],
                                         infer_datetime_format=True)
                movie.ratings = ratings_df
                movie.save(folder)


if __name__ == '__main__':
    folder = '/media/mariam/Files/ran/clacket-save'
    li = []
    with open(DATASET) as dataset:
        for movie_line in dataset:
            movie_id = movie_line.split('|')[0]
            try:
                m = Movie.load(folder, movie_id)
            except EOFError:
                li.append(movie_id)
    print('Found {0} movies in need of fixing...'.format(len(li)))
    print(str(li))
    print("Saving...")
    save_all(folder, li)
    print("Adding ratings...")
    add_all_ratings(folder, li)
Пример #5
0
def save_all(folder):
    with open(DATASET) as dataset:
        for movie_line in dataset:
            movie_line = movie_line.rstrip('\n')
            movie = Movie(movie_line)
            movie.save(folder)
Пример #6
0
def valid_movie():
    string = "1|Dinosaur Planet|2003|"\
             "Documentary,Animation,Family|"\
             "Christian Slater,Scott Sampson|"\
             "N/A|N/A|English|USA|series"
    return Movie(string)