Exemplo n.º 1
0
def parseActorsAndMovies():    
    Actor.unpickleActors()
    if(not Actor.total):
        print "Reading all actor data from dataset..."
        inputFile = open("../dataset/actor_movies.txt")
        lineString = inputFile.readline()
        
        while(lineString):
            actor_name = "" 
            parsedLineArray = lineString.split("\t\t")
            
            for i in range(0,len(parsedLineArray)):
                if(i == 0):
                    actor_name = parsedLineArray[0].strip()
                    actor = Actor(actor_name)            
                else:            
                    if(parsedLineArray[i].strip() != ""):
                        (movieTitle,movieYear) = Movie.separateTitleAndYear(parsedLineArray[i].strip())             
                        movieActedIn = Movie.hashByTitle[movieTitle]                
                        #Set actor's movie list
                        actor.movieList.append(movieActedIn)
                        #Update actor's movieVector for current movie
                        actor.moviesVector[0][movieActedIn.id] = 1        
                        #Append current movie's actor list with current actor
                        movieActedIn.actors.append(actor)
            
            lineString = inputFile.readline()
            print actor
            #Append actor's moviesVector to the actorMovieMatrix
            if(Actor.total == 1):
                Actor.actorMovieAdjacencyMatrix = np.append(np.zeros((0,Movie.total)),actor.moviesVector,axis=0)
            else:
                Actor.actorMovieAdjacencyMatrix = np.append(Actor.actorMovieAdjacencyMatrix,actor.moviesVector,axis=0)
        Actor.pickleActors()
Exemplo n.º 2
0
def parseMoviesAndRatings():
    inputFile = open("../dataset/movie_rating.txt")    
    lineString = inputFile.readline()
    movieRatingVector = np.zeros(0)
    global missing_count
        
    while(lineString):
        (movie_title,movie_year) = Movie.separateTitleAndYear(lineString.split("\t\t")[0].strip())
        movie_rating = float(lineString.split("\t\t")[1].strip())        
        
        if(not movie_title in Movie.hashByTitle):
            movie = Movie(movie_title,rating=movie_rating)
            missing_count = missing_count + 1
        else:    
            movie = Movie.hashByTitle[movie_title]
        
        movie.rating = movie_rating
        
        movieRatingVector = np.append(movieRatingVector,movie_rating)
        
        lineString = inputFile.readline()     
                
        if(movie.id%100000 == 0):
            print movie      
    
    arrayToSave = np.asarray(movieRatingVector)
    np.savetxt("./movie_rating_vector.csv", arrayToSave, delimiter=",")