Exemplo n.º 1
0
def tfUserTag(userId):
    usrObj = User(userId)
    movies = di.getUserMovies(userId)
    for movieId in movies:
        movieId = movieId[0]
        mv = Movie(
            movieId,
            0)  # Here the actor movie rank is not reqd., setting this to 0
        movieTags = di.getMovieTags(movieId)
        for movieTag in movieTags:
            tagId = movieTag[0]
            timeStamp = movieTag[1]
            mv.addTag(tagId, timeStamp)
        usrObj.addMovie(mv)
    tfVector = {}
    usrObj.setUnqTags()
    unqTags = usrObj.getUnqTags()
    #print(unqTags)
    for tagId in unqTags:
        tfFactorTag = 0
        for movie in usrObj.getMovies():
            searchTags = movie.getTags()
            tfFactor = 0
            totalMovieWeight = 0
            for tag in searchTags:
                if (tag.getId() == tagId):
                    tfFactor = tfFactor + tag.getTimeWeight()
                    #print(tfFactor)
                totalMovieWeight = totalMovieWeight + 1
            if (totalMovieWeight != 0):
                tfFactorTag = tfFactorTag + tfFactor / totalMovieWeight
        tfVector[tagId] = tfFactorTag
    tfVector = utils.sortByValue(tfVector)
    return utils.normalizeVector(tfVector)
Exemplo n.º 2
0
def tfIdfActorTag(actorId, idfActVector):
    tfVector = tfCalc.tfActorTag(actorId)
    tfIdfVector = {}
    tags = tfVector.keys()
    for tag in tags:
        tfIdfVector[tag] = tfVector[tag] * idfActVector[tag]
    #print('actor',tfIdfVector)
    tfIdfVector = utils.sortByValue(tfIdfVector)
    return utils.normalizeVector(tfIdfVector)
Exemplo n.º 3
0
def tfIdfGenreTag(genre, idfGenVector):
    tfVector = tfCalc.tfGenreTag(genre)
    tfIdfVector = {}
    tags = tfVector.keys()
    for tag in tags:
        tfIdfVector[tag] = tfVector[tag] * idfGenVector[tag]
    #print('genre',tfIdfVector)
    tfIdfVector = utils.sortByValue(tfIdfVector)
    return utils.normalizeVector(tfIdfVector)
Exemplo n.º 4
0
def tfIdfUserTag(userId, idfUserVector):
    tfVector = tfCalc.tfUserTag(userId)
    tfIdfVector = {}
    tags = tfVector.keys()
    for tag in tags:
        tfIdfVector[tag] = tfVector[tag] * idfUserVector[tag]
    #print('user',tfIdfVector)
    tfIdfVector = utils.sortByValue(tfIdfVector)
    return utils.normalizeVector(tfIdfVector)


#idfVect = idfMovieTag()
#tfIdfVect = tfIdfMovieTag('3189', idfVect)
#print((tfIdfVect))
Exemplo n.º 5
0
def tfActorTag(actorId):
    movies = di.getActorMovies(actorId)
    #print(movies)
    actor = Actor(actorId)
    for movie in movies:
        # Here the first element in the entry is movieId and second is the actor rank
        movieId = movie[0]
        rank = movie[1]
        # Create the Movie obj and add to the Actor
        mv = Movie(movieId, rank)
        # Get the tags of movie
        movieTags = di.getMovieTags(movieId)
        #print(movieId)
        #print(movieTags)
        # Calculate the weight of the tags
        for movieTag in movieTags:
            tagId = movieTag[0]
            timeStamp = movieTag[1]
            mv.addTag(tagId, timeStamp)
        actor.addMovie(mv)
    tfVector = {}
    actor.setUnqTags()
    unqTags = actor.getUnqTags()
    for tagId in unqTags:
        tfFactorTag = 0
        #print("tagId "+tagId)
        for movie in actor.getMovies():
            searchTags = movie.getTags()
            tfFactor = 0
            totalMovieWeight = 0
            movRankWeight = movie.getRWeight()
            #print(movRankWeight)
            for tag in searchTags:
                #print(tag.getId())
                if (tag.getId() == tagId):
                    tfFactor = tfFactor + tag.getTimeWeight()
                totalMovieWeight = totalMovieWeight + 1
            #print(tfFactor)
            #print(totalMovieWeight)
            if (
                    totalMovieWeight != 0
            ):  # Check this condition because their are movies with no tags
                tfFactorTag = tfFactorTag + (movRankWeight *
                                             tfFactor) / totalMovieWeight
        tfVector[tagId] = tfFactorTag
    tfVector = utils.sortByValue(tfVector)
    return utils.normalizeVector(tfVector)
Exemplo n.º 6
0
def tfGenreTag(genre):
    genObj = createGenObj(genre)
    unqTags = genObj.getUnqTags()
    tfVector = {}
    #print(unqTags)
    for tagId in unqTags:
        tfFactorTag = 0
        for movie in genObj.getMovies():
            searchTags = movie.getTags()
            tfFactor = 0
            totalMovieWeight = 0
            for tag in searchTags:
                if (tag.getId() == tagId):
                    tfFactor = tfFactor + tag.getTimeWeight()
                    #print(tfFactor)
                totalMovieWeight = totalMovieWeight + 1
            if (totalMovieWeight != 0):
                tfFactorTag = tfFactorTag + tfFactor / totalMovieWeight
        tfVector[tagId] = tfFactorTag
    tfVector = utils.sortByValue(tfVector)
    return utils.normalizeVector(tfVector)
Exemplo n.º 7
0
    def getVals(cls, tweet):
        before = len(cls.uniq_words)
        tfMap = {}      # stores frequency for a term in a tweet
        words = tweet.getTokens()
        for w in words:
            if w in tfMap:
                tfMap[w] += 1
            else:
                # when the word comes up for the first time, we 
                tfMap[w] = 1
                if w in cls.uniq_words:
                    cls.uniq_words[w] += 1
                else:
                    cls.uniq_words[w] = 1
                    cls.pos_map[w] = cls.pos_in_map
                    cls.pos_in_map += 1

        cls.num_prev_tweet += 1
        size_increase = len(cls.uniq_words) - before
        vec = {}

        for k,v in tfMap.iteritems():
            idf_score = 0
            if k in cls.idfMap:
                idf_score = cls.idfMap[k]  # get the score
                # update the score
                cls.idfMap[k] = math.log10((cls.num_prev_tweet) / (cls.uniq_words[k] + 1))
            else:
                idf_score = math.log10(cls.num_prev_tweet)
                cls.idfMap[k] = idf_score

            vec[cls.pos_map[k]] = v * idf_score

        vec = utils.normalizeVector(vec)

        tweet.setVector(vec)
        return size_increase