def main(gamename, seed, algo, task_id): env = gym.make(gamename) print("start running task", task_id) t0 = time.time() # set seed env.seed(seed) torch.manual_seed(seed) np.random.seed(seed) state_dim, action_dim, action_lim = check_env(env) replay_buffer = Data(args.buffersize) if algo == 'ddpg': trainer = DDPGTrainer(state_dim, action_dim, action_lim, replay_buffer) elif algo == 'td3': trainer = TD3Trainer(state_dim, action_dim, action_lim, replay_buffer) else: print("error algo") return frame_count = 0 timestep_since_eval = 0 reward_list = [] evaluations = [] for episode in range(args.max_episode): trainer.init_episode() obs = env.reset() obs = torch.from_numpy(obs).reshape(1, state_dim).float() reward_episode = 0 actor_loss_l = [] critic_loss_l = [] for i in range(args.T): if frame_count < args.start_timesteps: action = env.action_space.sample() else: action = trainer.get_exploration_action(obs) new_obs, r, done, _ = env.step(action) new_obs = torch.from_numpy(new_obs).reshape(1, state_dim).float() reward_episode += r sequence = [ obs, torch.from_numpy(action).reshape(1, action_dim).float(), reward_episode, new_obs, 0 if done else 1 ] replay_buffer.push(sequence) obs = new_obs frame_count += 1 timestep_since_eval += 1 if done: break trainer.optimize(i) reward_list.append(reward_episode) if timestep_since_eval > args.eval_freq: timestep_since_eval %= args.eval_freq evaluations.append(evaluate_policy(env, trainer)) trainer.save_model(gamename, evaluations, seed) if frame_count > args.Tmax: break trainer.save_model(gamename, evaluations, seed) return "Over"
def execute(self, p_conn): debug_on_event = p_conn.get_debug_on_event() data = Data() data.set_data("debug_on_event", debug_on_event) self.emit_("debug_on_event_update_request", data)
def get_movies_with_similar_genres(movie_id: int, n: int = 5, popularity_bias: bool = False , user_bias: bool = False, movies: pd.DataFrame = None): # Get all movies and split them into the base movie and the rest if n is None: n = 5 # Use the preferred movie df if movies is None: all_movies = Data.movie_meta()[Column.genres.value] else: all_movies = movies[Column.genres.value] # get the base out of the df and remove it from the rest base_genres = eval(all_movies.loc[movie_id]) all_movies = all_movies.drop(movie_id) # count similar genres all_movies = all_movies.apply( lambda row: count_elements_in_set(row, base_genres) ) # remove all movies which have no genre in common filtered_movies_sum = all_movies[all_movies > 0] # if user_bias is true if user_bias: # reduce the amount of movies to n * 10 movies top_n_mul_ten = filtered_movies_sum.nlargest(n * 10) ratings = Data.ratings() # group by movie ratings_grouped = ratings.groupby(str(Column.movie_id)) # calculate mean rating and number of ratings for each movie # (select rating to remove first level of column index. before: (rating: (mean, count)), after: (mean, count) ) measures: pd.DataFrame = ratings_grouped.agg(['mean', 'count'])[str(Column.rating)] # merging mean, count and genre sum into one DataFrame measures_movies = pd.merge(measures, pd.DataFrame(top_n_mul_ten), left_index=True, right_index=True) if popularity_bias: # give more weight to the number of ratings (~popularity) # by raising the avg ratings to some power (to preserve some notion of good vs. bad ratings) # and multiplying the count back in # additionally multiply the genre back in # to prevent good rated movies with little correlation to the genres results = measures_movies.eval('(mean ** 3) * count * genres') else: # multiply genre to prevent good rated movies with little correlation to the genres results = measures_movies.eval('mean * genres') else: results = filtered_movies_sum # breakpoint() return results
def _check_dbwhere(self, p_curdbwhere): #FIXME: documentacion predbwhere = self.__predbwhere if p_curdbwhere != predbwhere: data = Data() data.set_data("where", p_curdbwhere) self.emit_("where_changed", data) self.__predbwhere = p_curdbwhere
def _check_dbstack(self, p_curdbstack): #FIXME: documentacion predbstack = self.__predbstack if p_curdbstack != predbstack: data = Data() data.set_data("stack", p_curdbstack) self.emit_("stack_changed", data) self.__predbstack = p_curdbstack
def execute(self, p_conn): # FIXME: re-implementar este metodo haciendo # uso de 'p_conn.is_file_in_loadpath' funcname = self.__funcname if funcname[1]: real = "['%s', filemarker(), '%s']" % (funcname[0], funcname[1]) else: real = "'%s'" % funcname[0] dbstatus = p_conn.dbstatus(real) lines = [] if dbstatus: file_ = dbstatus[0]["file"] if (funcname[1] or file_): # Esto es evitando funciones definidas # en el CommandWindow. if not file_: file_ = p_conn.file_in_loadpath("'%s'" % self.__filename) if self.__file == file_: lines = dbstatus[0]["lines"] data = Data() data.set_data("file", self.__file) data.set_data("function", funcname[1] if funcname[1] else funcname[0]) data.set_data("lines", lines) self.emit_("breakpoints_update_request", data)
def _check_dbstatus(self, p_curdbstatus): #FIXME: documentacion #OJO: Puede que cambien los breakpoints y no se emita la sennal. # Esto no es problema, porque no tiene solucion en Octave_3.2.3. predbstatus = self.__predbstatus if p_curdbstatus != predbstatus: data = Data() data.set_data("breakpoints", p_curdbstatus) self.emit_("breakpoints_changed", data) self.__predbstatus = p_curdbstatus
def get_year_relevance(movie_id:int, n:int=0): release_years= Data.movie_meta()[Column.release_year.value] movie_year=release_years.loc[movie_id] release_years = release_years.subtract(movie_year) release_years = release_years.abs() release_years=1-release_years.div(release_years.max()) return release_years.drop(movie_id)
def build_index(cls): if cls.ix is None: cls.init() # automatically calls iw.commit() iw = cls.ix.writer() for movie_id, movie in Data.movie_meta().iterrows(): # extract fields fields: Dict = { 'movie_id': movie_id, 'title': movie[Column.title.value], # 'tagline': movie[Column.tagline.value], # 'summary': movie[Column.summary.value], # 'keywords': movie[Column.keywords.value], # 'popularity': movie[Column.num_ratings.value], # 'genres': movie[Column.genres.value], } # filter empty values (inserting fails for np.nan values) fields = { key: val for key, val in fields.items() if val is not None and val is not np.nan and val != '' } # insert into index iw.update_document(**fields) iw.commit(optimize=True)
def get_imdb_id(movielens_id: int) -> int: movies = Data.movie_meta() if movielens_id not in movies.index: raise MovieNotFoundException() movie = movies.loc[movielens_id] return movie[Column.imdb_id.value]
def get_movie_meta_for(movie_ids: List[int]) -> List[Dict]: # if single movie, pack into list if isinstance(movie_ids, int): movie_ids = [movie_ids] movie_ids = filter(lambda x: x is not None, movie_ids) meta: pd.DataFrame = Data.movie_meta() try: # filter metadata meta = meta.loc[movie_ids] except KeyError as e: raise MovieNotFoundException(e.args) # fetch metadata for the movies, convert to dictionary # orientation='records' results in [{'col1': 'val1', 'col2': 'val2'}, {'col1': 'val1', ..}] meta_dict: List[Dict] = meta.to_dict(orient='records') for item in meta_dict: for col in [ Column.actors, Column.genres, Column.keywords, Column.directors ]: if not pd.isnull(item[col.value]): item[col.value] = eval(item[col.value]) add_poster_urls(meta_dict) return meta_dict
def uuid_str(): ''' Return a [Base58 encoded][1] UUID that can be used to opaquely identify applications, sessions, and users to external clients [1]: https://en.wikipedia.org/wiki/Base58 ''' return Data(uuid4().bytes).stringWithEncoding(Base58)
def get_similarities_for(cls, movie_id: int, colname: str): # get similarity matrix (calculate if necessary) sim_matrix = cls.calculate_similarities(colname) # get absolute index of movie index = Data.movie_meta().index.get_loc(movie_id) # get similarities for this movie # use .toarray() to convert from sparse matrix # use [0] to convert "matrix" with only one row to one-dimensional array similarities = sim_matrix[index].toarray()[0] # put into pandas Series # use index=... to apply original index series = pd.Series(index=Data.movie_meta().index, data=similarities) return series.drop(movie_id)
def __call__(self, movie_id: int, n: int = 5): meta = Data.movie_meta() collection = meta[get_collection_mask(movie_id, meta)].index.values results: pd.Series = self.method(movie_id, n + 10) results = results.drop(collection, errors='ignore') return results
def get_normalized_popularity(): # used for popularity bias popularity = Data.movie_meta()[Column.num_ratings.value] # apply root reduce linearity # (if movie A has double the ratings of movie B, its popularity should only be slightly higher) popularity **= (1 / 10) # normalize popularity /= popularity.max() return popularity
def avg_rating_for_user(user_id: int) -> float: """ Calculates the average score for ratings from a specified user. """ ratings: pd.Series = Data.ratings_as_series() # check if user_id exists and raise exception if it does not if user_id not in ratings: raise UserNotFoundException() # select ratings from specified user_id user_ratings: pd.Series = ratings.loc[user_id] # calculate average using integrated function return user_ratings.mean()
def calculate_similarities(cls, colname: str, overwrite_existing: bool = False): if colname not in cls.similarity_matrices or overwrite_existing: # calculate tf_idf for column tfidf_matrix = cls.tf_idf.fit_transform( Data.movie_meta()[colname].fillna('')) # calculate similarities between movies # use dense_output=False (results in sparse matrix) to reduce memory usage cls.similarity_matrices[colname] = linear_kernel( tfidf_matrix, tfidf_matrix, dense_output=False) return cls.similarity_matrices[colname]
def get_movielens_id(tmdb_id: int = None, imdb_id: int = None) -> int: movies: pd.DataFrame = Data.movie_meta() if tmdb_id is not None: movie = movies.query(f'{Column.tmdb_id.value} == {tmdb_id}') elif imdb_id is not None: movie = movies.query(f'{Column.imdb_id.value} == {imdb_id}') else: return None if movie.empty: return None return movie.index[0]
def tmdb_reference(movie_id: int, n: int = 5): movie = Data.movie_meta().loc[movie_id] # get list from string representation similar_tmdb = eval(movie[Column.tmdb_similar.value]) # get movielens id from tmdb_id similar = map(lambda tmdb_id: get_movielens_id(tmdb_id=tmdb_id), similar_tmdb) # return with artificial decreasing score return pd.Series({ item: -index for index, item in enumerate(similar) if item is not None })
def recommend_movies(movie_id: int, n: int = 5, filter_below_avg_ratings: bool = False, popularity_bias: bool = False) \ -> List[int]: ratings = Data.ratings() # first get the ratings for the base movie ratings_of_base_movie = ratings.query('movie_id == %s' % movie_id) # check if there are reviews for this movie if ratings_of_base_movie.empty: raise MissingDataException('no ratings for movie_id %s' % movie_id) if filter_below_avg_ratings: # of those, select the above average ratings avg_rating = ratings_of_base_movie['rating'].mean() # query is actually faster than the python subscription syntax ( users[users['rating'] >= avg] ) ratings_of_base_movie = ratings_of_base_movie.query('rating >= %f' % avg_rating) # to get ratings from all the users that have rated/liked the base movie, # perform a (left outer) join on all the ratings on user_id relevant_movies = ratings_of_base_movie.join(ratings, on='user_id', lsuffix='_L') # remove the columns that were duplicated as result of the join relevant_movies = relevant_movies[['movie_id', 'rating']] # remove the base movie from the results relevant_movies = relevant_movies.query('movie_id != %s' % movie_id) if relevant_movies.empty: raise MissingDataException( 'no other ratings from users that rated movie_id %s' % movie_id) # group by movie relevant_movie_groups = relevant_movies.groupby('movie_id') # calculate mean rating and number of ratings for each movie # (select rating to remove first level of column index. before: (rating: (mean, count)), after: (mean, count) ) measures: pd.DataFrame = relevant_movie_groups.agg(['mean', 'count'])['rating'] if popularity_bias: # give more weight to the number of ratings (~popularity) # by raising the avg ratings to some power (to preserve some notion of good vs. bad ratings) # and multiplying the count back in results = measures.eval('(mean ** 3) * count') else: results = measures['mean'] return results
def _recommend_movies(movie_id: int, n: int, method: Method) -> List[Dict]: if movie_id not in Data.movie_meta().index: raise MovieNotFoundException # start with the movie itself movies: List[int] = [movie_id] # calculate similarities scores: Series = method(movie_id) # and filter out any movies that were recommended recently scores = History.filter(scores) if method == Method.reference or method == Method.sequels: n = 20 # movies = [base_movie, ...recommendations] movies.extend(scores.nlargest(n).index) # add recommendations for movies History.append(movies) return get_movie_meta_for(movies)
def search(cls, query_text: str, n: int, add_posters: bool = True): # this method applies a popularity bias to search results # as they need to be resorted, more search terms should be provided than necessary, # to be able to recover popular results that have rather low scores results = cls._search(query_text, n + 25) # encapsulate in pandas.Series for further operations scores = pd.Series(results, name='score') # perform a (right outer) join to connect the search results to the metadata df = Data.movie_meta().join(scores, how='right') # calculate the weighted score by raising it to some power # in order for the popularity to not overpower the score completely # and multiply with the number of ratings (the popularity) df.eval(f'weighted = score**16 * {Column.num_ratings.value}', inplace=True) # extract the n best results and export as list movie_ids = list(df.nlargest(n, 'weighted').index) # fetch metadata meta = get_movie_meta_for(movie_ids) return meta
def get_collection(movie_id: int, df: pd.DataFrame = None, include_base_movie: bool = True, start_from_base_movie: bool = False, wrap_to_start: bool = False) -> pd.DataFrame: """ Get movies from a collection. :param movie_id: a movie that is in a collection :param df: the pandas DataFrame to search :param include_base_movie: whether to include movie_id itself in the result :param start_from_base_movie: whether to split the result and start at movie_id :param wrap_to_start: if start_from_base_movie: at the end of the collection, wrap over to the start and include the prequels :return: a DataFrame containing the movies in the collection """ if df is None: df = Data.movie_meta() # select movies that are in collection m = df[get_collection_mask(movie_id, df)] # sort by release year m = m.sort_values(by=Column.release_date.value) if not include_base_movie: m = m.drop(movie_id) if start_from_base_movie: # split dataframe at base_movie sequels = m.loc[movie_id:] prequels = m.loc[:movie_id - 1] if wrap_to_start: # reverse order and join again m = pd.concat([sequels, prequels]) else: # just return the movies starting with the base movie m = sequels return m
def __init__(self, p_mwindow): """ p_mwindow: un 'MainWindow'. Retorna: una 'Connection'. Crea una nueva 'Connection'. """ threading.Thread.__init__(self) gobject.GObject.__init__(self) self.__mwindow = p_mwindow self.__continue = True self.__term = Terminal(self) # Aqui se almacenan los comandos # que envia el usuario a octave. self.__tail = [] # Aqui se almacenan los comandos que se # ejecutaran cuando el octave este listo. self.__tail_prio = TailWithPriority() self.__dbstatus_code = self.__get_dbstatus_code() self.__dbstack_code = self.__get_dbstack_code() self.__prestate = None self.__predbstack = None self.__predbwhere = None self.__predbstatus = None self.__predir = None #self.__prevars = None# para emitir un "vars_changed" flag = Data() key = "key" self.connect("changed", self.__on_changed, flag, key) self.connect("stack_changed", self.__on_stack_changed, flag, key) self.connect("where_changed", self.__on_where_changed, flag, key)
def actors_as_lists(): # since eval (convert string representation to object) is costly time-wise, cache results return Data.movie_meta()[Column.actors.value].map(eval)
def execute(self, p_conn): if "is patch for Octave-3.2.3": dbwhere = p_conn.dbwhere() if p_conn.dbstack()["frames"] else {} #dbwhere = p_conn.dbwhere() # Puede hacer un beep! file_ = self.__file positions = [] current = False if dbwhere and dbwhere["file"] == file_: pos = (dbwhere["line"], dbwhere["column"]) if None not in pos: positions.append(pos) current = True data = Data() data.set_data("positions", positions) data.set_data("current", current) self.emit_("stack_update_request", data) return file_ = self.__file omit = self.__omit omit = str(omit) if omit else "" dbstack = p_conn.dbstack(omit) positions = [] frames = dbstack["frames"] current = bool(frames and frames[0]["file"] == file_) for frame in frames: if frame["file"] == file_: positions.append((frame["line"], frame["column"])) data = Data() data.set_data("positions", positions) data.set_data("current", current) self.emit_("stack_update_request", data)
logging.basicConfig( level=logging.INFO, format= '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', datefmt='%a, %d %b %Y %H:%M:%S', ) if len(sys.argv) >= 2: dataIdx = '{:0>2}'.format(sys.argv[1]) else: dataIdx = 'CoNLL' trainPath = 'data/normal/en_train_{}.txt'.format(dataIdx) testPath = 'data/normal/en_test_CoNLL.txt' data = Data(inputPathList=[trainPath], testPath=testPath) return_data = data.loadCoNLL(trainPath, loadFeatures=True) split_data = train_test_split(*return_data, test_size=0.1, random_state=0) X_train = split_data[:-2:2] X_val = split_data[1:-2:2] y_train, y_val = split_data[-2:] modelWrapper = BiLSTMCRF(data) model = modelWrapper.buildModel(feature2idx=data.feature2idx) history = metricHistory(X_val, y_val, saveDir=dataIdx) history.set_model(model) model.fit(X_train, y_train, epochs=50,
from util.plot import Plot, multiplot from util.stats import cdf_fit_func import numpy as np from util.data import Data # d = Data.load("knn_results_[yelp-mnist].pkl") d = Data.load("prediction_results.pkl") names = d.names.copy() names = [names[0], names[6], names[1]] + names[2:6] + names[7:] d.reorder([names[0], names[6], names[1]]) d.sort() d["Errors"] = ([float(v) if (v <= 5) else 5.0 for v in e] for e in d["Errors"]) d["Mean Error"] = (sum(e) / len(e) for e in d["Errors"]) d["Mean Squared Error"] = (sum(v**2 for v in e) / len(e) for e in d["Errors"]) d["Error Variance"] = (float(np.var(e)) for e in d["Errors"]) d._max_display = 1000 print(d) all_data = d # Get the unique dimensions and algorithms. dims = sorted(set(all_data["Dimension"])) algs = sorted(set(all_data["Algorithm"])) data_sets = sorted(set(all_data["Data"])) for ds in data_sets: for alg in algs: d = all_data[all_data["Data"] == ds] d = d[d["Algorithm"] == alg] min_index = int(np.argmin(d["Mean Error"])) m = d[min_index, "Method"]
def directors_as_lists(): return Data.movie_meta()[Column.directors.value].map(eval)
def get_genre_as_lists(): return Data.movie_meta()[Column.genres.value].map(eval)