def competition(self, movie): date = movie[4][3] week = utilities.findweek(movie[4][0], movie[4][1], movie[4][2]) year = movie[4][0] title = movie[0] competitors = [] #list of films released on nearly the same weekend as current film ncompetitors = [] #narrowed list based on most successful competitors #search data for entry in self.data2: #determine week of release w = utilities.findweek(entry[4][0], entry[4][1], entry[4][2]) if title!=entry[0]: #if the film being looked at is not the current film if date==entry[4][3]: #if the films were released on exactly the same date competitors.append(entry) #if the films were released within 2 weeks of each other elif year==entry[4][3] and (week==w or (week>=w-2 and week<=w+2)): competitors.append(entry) #same search but considering films released at the beginning/end of the year elif week>=51 and year==entry[4][3]-1 and w<=(week+2)%53: competitors.append(entry) elif week<=2 and year==entry[4][3]+1 and w>=(week-2)%53: competitors.append(entry) #of the list of films released near the current film's date for c in competitors: #if they were or are projected to be successful if c[1][0]>=100000000: #then they are a major competitor, and are added to the final list ncompetitors.append(c) #Return the final list of major competitors return ncompetitors
def weekrate(self, n, b): gross = 0 #Running total of the domestic gross of all films released #on the specified week of the year. budget = 0 #Running total of the budget " " films = [] #List of all films released on the specified week of the year. nfilms = [] #Narrowed list of matching films, based on similar budgets. total = [] #List of revenue tiers for films released on specified week. for movie in self.data2: #find the week of the movie's release date = utilities.findweek(movie[4][0], movie[4][1], movie[4][2]) if date==n: #if the release week's match films.append(movie) #list the movie #if no matches were found, return None if len(films)==0: return None #otherwise narrow the results to include only those with the most #similar budgets nfilms = utilities.closest_matches(films, b) #then go through the list for entry in nfilms: gross+= entry[1][0] #sum the revenues budget+= entry[1][1] #sum the budgets total.append(entry[1][2]) #list the revenue tiers #determine the average success ratio av_gross = float(gross)/float(budget) #The final value of the revenue tier projection is the most common #value listed. In other words, if most films fall in tier 3, then #final = 3. count = Counter(total) final = count.most_common()[0][0] #return an array of results return [final, av_gross]
def predict(self, title): #search for the film listed movie = self.find_film(title) pos_factors = [] #list of positive factors neg_factors = [] #list of negative factors final = [] #list of revenue tiers for each factors (i.e. actor trends, director trends, etc.) #if applicable categories latest = None #if an actor very recently had a successful film prequel = None #the prequel of the current film trilogy_boost = 0 #1 if the film is the end of a trilogy #find prequel of the film if applicable if movie[3][4]==1: #call function prequel = self.find_prequel(movie) #if a prequel is found if prequel: #if it was successful if prequel[0][1][0]>100000000: pos_factors.append(1) pos_factors.append("Successful Prequel") pos_factors.append("The success of " + movie[0] + " was boosted by the success of it's prequel, " + prequel[0][0] + ".") neg_factors.append(0) neg_factors.append("") neg_factors.append("") #if it was not very successful if prequel[0][1][0]<50000000: neg_factors.append(1) neg_factors.append("Unsuccessful Prequel: " + prequel[0][0]) neg_factors.append("Given the lack of success garnered by " + movie[0] + "'s prequel, " + prequel[0][0] + ", the film suffered from lowered expectations.") pos_factors.append(0) pos_factors.append("") pos_factors.append("") #if there is an indication that the current film is the end of a trilogy if (len(prequel[1])==2 or len(prequel[1])==5) or "III" in movie[0] or "VI" in movie[0] or "3:" in movie[0] or "6:" in movie[0] or "3"==movie[0][-1] or "6"==movie[0][-1]: pos_factors.append(0) pos_factors.append("End of Trilogy") pos_factors.append("As the end of a trilogy, " + movie[0] + " enjoyed higher audience anticipation, driving theater attendance.") neg_factors.append(0) neg_factors.append("") neg_factors.append("") trilogy_boost = 1 #if it was a short duration since the release of the prequel, that's beneficial. if prequel[0][4][0]>movie[4][0]-3: pos_factors.append(1) pos_factors.append("Prequel Release") pos_factors.append("Given the short duration since the release of " + movie[0] + "'s prequel, the film could hold the attention and anticipation of its fans until its release.") neg_factors.append(0) neg_factors.append("") neg_factors.append("") #or if it was a long time, then it hurts the film. else: neg_factors.append(1) neg_factors.append("Prequel Release") neg_factors.append("In the long time span since the release of " + movie[0] + "'s prequel, the film lost steam and anticipation, thus having less theater attendance.") pos_factors.append(0) pos_factors.append("") pos_factors.append("") #find weekend stats by calling function weekend = self.weekrate(utilities.findweek(movie[4][0], movie[4][1], movie[4][2]), movie[1][1]) if weekend: #if results were found final.append(weekend[0]) #add the first result to the list of revenue tiers if weekend[1]>=2.0: #if the average success ratio is high, it is noteworthy pos_factors.append(1) pos_factors.append("Release Timing") pos_factors.append("Theater attendances is usually higher during the time when this film was released, on " + str(movie[4][1]) + "-" + str(movie[4][2]) + "-" + str(movie[4][0]) + ".") neg_factors.append(0) neg_factors.append("") neg_factors.append("") if weekend[1]<1.0: #if the average success ratio is very low, then it is also noteworthy neg_factors.append(1) neg_factors.append("Release Timing") neg_factors.append("Theater attendances is usually much lower during the time when this film was released, on " + str(movie[4][1]) + "-" + str(movie[4][2]) + "-" + str(movie[4][0]) + ".") pos_factors.append(0) pos_factors.append("") pos_factors.append("") #find actor stats by calling function actor = self.actor_trends(movie[2][0], movie) a = movie[2][0] if actor: #if results were found final.append(actor[0]) #add the first result to the list of revenue tiers #if the actor had a recent success in which he/she starred and it was an animated film and it was within the last 3 years, it is a boost if actor[2] and actor[2][2][0]>=150000000 and a==movie[2][0] and movie[3][3]!="Digital Animation" and actor[2][4][0]>=movie[4][0]-3: pos_factors.append(1) pos_factors.append("Recent Actor Success") pos_factors.append("This film received a boost in popularity from the recent success of " + a + " in " + actor[2][0] + ".") neg_factors.append(0) neg_factors.append("") neg_factors.append("") actor[0]+= 1 #boost the usual actor gross that was returned final.append(actor[0]) #add the value to the list of revenue tiers since the actor is an important factor if actor[1]>=2.0: #if the actor average success ratio is high, that is noteworthy pos_factors.append(1) pos_factors.append("Successful Actor Trends") pos_factors.append("In the recent years preceding this film, actor " + a + " had increasing popularity for a variety of successful roles, leading to higher attendance for this film.") neg_factors.append(0) neg_factors.append("") neg_factors.append("") if actor[1]<1.0 and a==movie[2][0]: #if the success ratio is low, that is also noteworthy neg_factors.append(1) neg_factors.append("Unsuccessful Actor Trends") neg_factors.append("In the recent years preceding this film, actor " + a + " had a decreasing reputation for holding numerous unsuccessful roles, leading to lower attendance for this film.") pos_factors.append(0) pos_factors.append("") pos_factors.append("") #check for actor partnerships for x in actor[3]: #but not for prequel/sequel films if prequel and not x[1] in prequel[1]: pos_factors.append(1) pos_factors.append("Actor Partnership") pos_factors.append(a + " and " + x[0] + " were popular for collaborating on numerous successful films, driving high audience expectations.") neg_factors.append(0) neg_factors.append("") neg_factors.append("") #same for directors for x in actor[4]: if prequel and not x[1] in prequel[1]: pos_factors.append(1) pos_factors.append("Popular Director Partnership with " + x[0]) pos_factors.append(a + " has worked on numerous films with director " + x[0] + ", and the two became known for popular and successful films, leading to higher attendance for this film.") neg_factors.append(0) neg_factors.append("") neg_factors.append("") #find director by calling function director = self.director_trends(movie) if director: #if there are results final.append(director[0]) #the first result is added to the list of revenue tiers if director[1]>=3.0 or director[3]: #if the director has a very high average success ratio, it is noteworthy pos_factors.append(1) pos_factors.append("Director Quality") pos_factors.append("In the recent years preceding this film, " + movie[2][4] + " gained a reputation for directing high quality films, boosting attendance for " + movie[0] + ".") neg_factors.append(0) neg_factors.append("") neg_factors.append("") if director[1]<1.0: #if the success ratio is very low, that is also noteworthy neg_factors.append(1) neg_factors.append("Director Quality") neg_factors.append("In the years before this films, " + movie[2][4] + " lacked a reputation for making high quality films, which discouraged audience members from attending.") pos_factors.append(0) pos_factors.append("") pos_factors.append("") #if the director's latest film was very successful but it was not the film's prequel, it is positive if director[2][1][0]>=200000000 and prequel and director[2]!=prequel[0]: pos_factors.append(1) pos_factors.append("Recent Directoral Success: " + director[2][0]) pos_factors.append(movie[0] + " received a significant boost from director " + movie[2][4] + "'s box office hit " + director[2][0]) neg_factors.append(0) neg_factors.append("") neg_factors.append("") latest = director[2] #find genre stats by calling function genre = self.genre_trends(movie) if genre: #if there are results final.append(genre[0]) #the first result is added to the list of revenue tiers if genre[1]>=3.0: #if the genre has a very high average success ratio, it is noteworthy pos_factors.append(1) pos_factors.append("Popular Genre") pos_factors.append("Before the release of " + movie[0] + ", " + movie[3][0] + " was a popular genre, which boosted the film's popularity.") neg_factors.append(0) neg_factors.append("") neg_factors.append("") if genre[1]<1.0: #if it has a very low average success ratio, it is also noteworthy neg_factors.append(1) neg_factors.append("Unpopular Genre: " + movie[3][0]) neg_factors.append(movie[3][0] + " was not particularly popular in the time before " + movie[0] + "'s release, which hurt box office sales.") pos_factors.append(0) pos_factors.append("") pos_factors.append("") #find other stats by calling function. No "notes" necessary for output other = self.other_trends(movie) if other: final.append(other) #find similar movie by calling function similar = self.find_similar(movie) if similar: final.append(similar[1][2]) #find competitors by calling function competitors = self.competition(movie) for film in competitors: #note all major competitors neg_factors.append(1) neg_factors.append("Competition") neg_factors.append("During " + movie[0] + "'s time in theaters, it suffered in ticket sales because of competition from the successful film " + film[0] + ", which was in theaters at the same time as " + movie[0] + ".") pos_factors.append(0) pos_factors.append("") pos_factors.append("") #Add other factors to positive/negative results. Some of these are less certain and added more as guesses for outcomes. It will be nice #to improve the accuracy of this section. if movie[1][0]>150000000: if movie[3][1]=="Based on Real Life Events": pos_factors.append(1) pos_factors.append("Real Life Event") pos_factors.append("Because " + movie[0] + " was based on a popular real life event, audience members were even more likely to attend.") neg_factors.append(0) neg_factors.append("") neg_factors.append("") if movie[3][1]=="Based on Fiction Book/Short Story": pos_factors.append(1) pos_factors.append("Based on Book") pos_factors.append("Because " + movie[0] + " was based on a popular book, avid fans helped spread anticipation and popularity of the film.") neg_factors.append(0) neg_factors.append("") neg_factors.append("") if movie[3][1]=="Based on Comic/Graphic Novel": if movie[3][2]=="Super Hero": pos_factors.append(1) pos_factors.append("Super Hero") pos_factors.append("Since " + movie[0] + " was an adapation of a popular Super Hero, fans of all ages filled the theaters.") neg_factors.append(0) neg_factors.append("") neg_factors.append("") else: pos_factors.append(1) pos_factors.append("Based on Novel") pos_factors.append("Because " + movie[0] + " was based on a popular book, avid fans helped spread anticipation and popularity of the film.") neg_factors.append(0) neg_factors.append("") neg_factors.append("") if movie[3][2]=="Kids Fiction" and movie[3][3]=="Digital Animation": pos_factors.append(1) pos_factors.append("Young Audience") pos_factors.append("Since " + movie[0] + " was created for a younger audience, both children and parents filled the theaters, driving ticket sales.") neg_factors.append(0) neg_factors.append("") neg_factors.append("") if movie[3][3]=="Animation/Live Action": pos_factors.append(1) pos_factors.append("Animation") pos_factors.append("The high quality animation of " + movie[0] + " boosted attendance as people flocked to see the stunning visual effects.") neg_factors.append(0) neg_factors.append("") neg_factors.append("") if len(final)==0: prediction = 0 else: #the prediction is the most common value in 'final'. If multitple integers are most common, then the highest value takes preference. count = Counter(final) prediction = count.most_common()[0][0] c = count.most_common()[0][1] for item in count.most_common(): if item[1]==c: prediction = item[0] #if the film has a prequel and it was more successful than the current prediction, then it's gross becomes the current prediction. Most sequels do as well #or better than their predecers. if prequel and prequel[0][1][2] >= prediction: prediction = prequel[0][1][2] #if the director's latest film was more successful than the prediction, then that film's gross becomes the prediction if latest and latest[1][2] > prediction: prediction = latest[1][2] #add the 'trilogy_boost', which bumps the prediction up a tier if it is the end of a trilogy. prediction = prediction + trilogy_boost #return the film entry, positive factors of success, and negative factors of success. These are formatted to be used in the bar charts for the web site. return [movie, prediction, pos_factors, neg_factors]