def group2_item_rating_predicate(movies_df, fold='0', phase='eval'): """ group2_item_rating(G) """ group2_item_rating = pd.DataFrame(index=movies_df.index) write(group2_item_rating.loc[:, []], 'group2_item_rating_targets', fold, phase)
def capture_pattern(device=None, backend=None): inst = get_instrument(device, backend=backend) for cmd in setup_cmds.strip().split('\n'): if cmd.endswith('?'): query(inst, cmd) else: write(inst, cmd)
def sim_items_predicate(observed_ratings_df, movies, fold='0', phase='eval'): """ Item Similarity Predicate: sim_cosine_items, built only from observed ratings """ print("Item Similarity Predicate") item_cosine_similarity_frame = query_relevance_cosine_similarity( observed_ratings_df.loc[:, ['rating']].reset_index(), 'movieId', 'userId') # take top 50 for each movie to define pairwise blocks item_cosine_similarity_block_frame = pd.DataFrame(index=movies, columns=range(50)) for m in observed_ratings_df.reset_index().movieId.unique(): item_cosine_similarity_block_frame.loc[ m, :] = item_cosine_similarity_frame.loc[m].nlargest(50).index # some movies may not have been rated by any user item_cosine_similarity_block_frame = item_cosine_similarity_block_frame.dropna( axis=0) flattened_frame = item_cosine_similarity_block_frame.values.flatten() item_index = np.array([[i] * 50 for i in item_cosine_similarity_block_frame.index ]).flatten() item_cosine_similarity_block_index = pd.MultiIndex.from_arrays( [item_index, flattened_frame]) item_cosine_similarity_block_series = pd.Series( data=1, index=item_cosine_similarity_block_index) # # populate the item_content_similarity_block_series with the similarity value # for index in item_cosine_similarity_block_index: # item_cosine_similarity_block_series.loc[index] = item_cosine_similarity_frame.loc[index[0], index[1]] write(item_cosine_similarity_block_series, 'sim_items_obs', fold, phase)
def group_1(user_df, fold='0', phase='eval'): """ group_1(U) """ group_member_df = user_df.loc[:, ['gender']] group_member_df.loc[:, 'value'] = 1 write(group_member_df[group_member_df.gender == 'F'].value, 'group_1_obs', fold, phase)
def main(args): files = helpers.parse_flags(args) analyzer = Analyzer(helpers.read(files["input"])) if analyzer.try_analyze(): for warning in analyzer.warnings: print(warning) print("The file is fine, no error was found") print("\nCheck the", files["output"], "file for more information.") helpers.write(files["output"], analyzer.symbols) sys.exit(0) else: for warning in analyzer.warnings: print(warning) for error in analyzer.errors: print(error) if analyzer.failedAt == "Semantic": print("\nCheck the", files["output"], "file for more information.") helpers.write(files["output"], analyzer.symbols) sys.exit(1)
def svd_ratings_predicate(observed_ratings_df, truth_ratings_df, fold='0', phase='eval'): """ pmf_ratings Predicates """ print("SVD predicates") svd_model = SVD() reader = Reader(rating_scale=(0.2, 1)) train_dataset = Dataset.load_from_df(df=observed_ratings_df.reset_index( ).loc[:, ['userId', 'movieId', 'rating']], reader=reader) svd_model.fit(train_dataset.build_full_trainset()) # make predictions predictions = pd.DataFrame(index=truth_ratings_df.index, columns=['rating']) for row in truth_ratings_df.loc[:, ['rating']].iterrows(): uid = row[0][0] iid = row[0][1] predictions.loc[(uid, iid), 'rating'] = svd_model.predict(uid, iid).est write(predictions, 'svd_rating_obs', fold, phase)
def sim_demo_users_predicate(user_df, fold='0', phase='eval'): """ Sim demo users predicate """ print("Sim demo users predicate") # cosine similarity dummified_user_df = pd.get_dummies(user_df.drop('zip', axis=1).astype({'age': object, 'occupation': object})) > 0 user_demo_matrix = dummified_user_df.values row_norms = [np.linalg.norm(m) for m in user_demo_matrix] user_demo_matrix = np.array([user_demo_matrix[i] / row_norms[i] for i in range(len(user_demo_matrix))]) user_demo_similarity_block_frame_data = np.matmul(user_demo_matrix, user_demo_matrix.T) user_similarity_df = pd.DataFrame(data=user_demo_similarity_block_frame_data, index=user_df.index, columns=user_df.index) # take top 50 for each user to define pairwise blocks user_demo_similarity_block_frame = pd.DataFrame(index=user_df.index, columns=range(50)) for u in user_df.index: user_demo_similarity_block_frame.loc[u, :] = user_similarity_df.loc[u].nlargest(50).index flattened_frame = user_demo_similarity_block_frame.values.flatten() user_index = np.array([[i] * 50 for i in user_demo_similarity_block_frame.index]).flatten() user_demo_similarity_block_index = pd.MultiIndex.from_arrays([user_index, flattened_frame]) user_demo_similarity_block_series = pd.Series(data=1, index=user_demo_similarity_block_index) # # populate the item_content_similarity_block_series with the similarity value # for index in user_demo_similarity_block_index: # user_demo_similarity_block_series.loc[index] = user_similarity_df.loc[index[0], index[1]] write(user_demo_similarity_block_series, 'sim_demo_users_obs', fold, phase)
def nmf_ratings_predicate(observed_ratings_df, truth_ratings_df, fold='0', setting='eval'): """ nmf_ratings Predicates """ nmf_model = NMF(n_components=50) observed_user_item_matrix = observed_ratings_df.loc[:, 'rating'].unstack( fill_value=0.5) truth_user_item_matrix = truth_ratings_df.loc[:, 'rating'].unstack() transformed_matrix = nmf_model.fit_transform(observed_user_item_matrix) predictions = pd.DataFrame(nmf_model.inverse_transform(transformed_matrix), index=observed_user_item_matrix.index, columns=observed_user_item_matrix.columns) # make predictions for the user item pairs in the truth frame predictions = predictions.reindex(truth_user_item_matrix.index, columns=truth_user_item_matrix.columns, fill_value=0.5).stack() predictions = predictions.clip(0, 1) write(predictions, 'nmf_rating_obs', fold, setting)
def is_genre_predicate(movies_df, fold='0', phase='eval'): """ is_genre(M, G) Predicates """ write( movies_df.drop('movie title', axis=1).stack(), 'is_genre_obs', fold, phase)
def sim_users_predicate(observed_ratings_df, users, fold='0', phase='eval'): """ User Similarity Predicate: sim_cosine_users, built only from observed ratings """ print("User Similarity Predicate") user_cosine_similarity_series = query_relevance_cosine_similarity( observed_ratings_df.loc[:, ['rating']].reset_index(), 'userId', 'movieId') # take top 50 for each user to define pairwise blocks user_cosine_similarity_block_frame = pd.DataFrame(index=users, columns=range(50)) for u in observed_ratings_df.index.get_level_values(0).unique(): user_cosine_similarity_block_frame.loc[u, :] = user_cosine_similarity_series.loc[u].nlargest(50).index # some users may not have rated any movie in common with another user user_cosine_similarity_block_frame = user_cosine_similarity_block_frame.dropna(axis=0) flattened_frame = user_cosine_similarity_block_frame.values.flatten() user_index = np.array([[i] * 50 for i in user_cosine_similarity_block_frame.index]).flatten() user_cosine_similarity_block_index = pd.MultiIndex.from_arrays([user_index, flattened_frame]) user_cosine_similarity_block_series = pd.Series(data=1, index=user_cosine_similarity_block_index) # # populate the item_content_similarity_block_series with the similarity value # for index in user_cosine_similarity_block_index: # user_cosine_similarity_block_series.loc[index] = user_cosine_similarity_series.loc[index[0], index[1]] write(user_cosine_similarity_block_series, 'sim_users_obs', fold, phase)
def group1_avg_rating_predicate(fold='0', phase='eval'): """ group1_avg_rating_predicate(c) """ group1_avg_rating = pd.DataFrame(index=[1]) write(group1_avg_rating.loc[:, []], 'group1_avg_rating_targets', fold, phase)
def sim_items_predicate(observed_ratings_df, truth_ratings_df, movies, fold='0', setting='eval'): """ Item Similarity Predicate: sim_cosine_items, built only from observed ratings """ item_cosine_similarity_series = query_relevance_cosine_similarity( observed_ratings_df.loc[:, ['rating']].reset_index(), 'movieId', 'userId') # take top 25 for each movie to define pairwise blocks item_cosine_similarity_block_frame = pd.DataFrame(index=movies, columns=range(25)) for m in observed_ratings_df.reset_index().movieId.unique(): item_cosine_similarity_block_frame.loc[ m, :] = item_cosine_similarity_series.loc[m].nlargest(25).index # some movies may not have been rated by any user item_cosine_similarity_block_frame = item_cosine_similarity_block_frame.dropna( axis=0) flattened_frame = item_cosine_similarity_block_frame.values.flatten() item_index = np.array([[i] * 25 for i in item_cosine_similarity_block_frame.index ]).flatten() item_cosine_similarity_block_index = pd.MultiIndex.from_arrays( [item_index, flattened_frame]) item_cosine_similarity_block_series = pd.Series( data=1, index=item_cosine_similarity_block_index) write(item_cosine_similarity_block_series, 'sim_items_obs', fold, setting)
def group1_genre_rating_predicate(movies_df, fold='0', phase='eval'): """ group1_genre_rating(G) """ group1_genre_rating = pd.DataFrame( index=movies_df.columns.difference(['movie title'])) write(group1_genre_rating.loc[:, []], 'group1_genre_rating_targets', fold, phase)
def average_user_rating_predicate(observed_ratings_df, fold='0', phase='eval'): """ Rated Predicates """ observed_ratings_series = observed_ratings_df.loc[:, 'rating'] avg_rating_series = observed_ratings_series.reset_index()[["userId", "rating"]].groupby("userId").mean() write(avg_rating_series, 'avg_user_rating_obs', fold, phase)
def group_member_predicate(user_df, fold='0', phase='eval'): """ group_member(U, G) Predicates """ group_member_df = user_df.loc[:, ['gender']] group_member_df.loc[:, 'value'] = 1 group_member_df.gender = group_member_df.gender.map({'F': 1, 'M': 2}) write(group_member_df, 'group_member_obs', fold, phase)
def get_screenshot(filename=None, device=None, backend=None): inst = get_instrument(device, backend=backend) if not filename: filename = dt.now().replace(microsecond=0).isoformat('_').replace(':','-') + '.png' if query(inst, "ACQuire:STATe?") not in ("STOP", "BRE"): write(inst, "STOP") if query(inst, "HCOPy:LANGuage?") != "PNG": write(inst, "HCOPy:LANGuage PNG") data = inst.query_raw("HCOPy:DATA?") data = ieee_488_2_block_data(data) with open(filename, 'wb') as f: f.write(data)
def group_predicate(user_df, fold='0', phase='eval'): """ group Predicates group(G) & rating(U, I) & target(U, I) & group(G, U) >> group_avg_item_rating(G, I) group: possible values are M or F, therefore sort by when 'M' or 'F' in second column only need user_df """ # group_series = pd.Series(data=1, index=user_df.gender.unique()) group_series = pd.Series(data=1, index=[1, 2]) write(group_series, 'group_obs', fold, phase)
def target_predicate(truth_ratings_df, partition='obs', fold='0', setting='eval'): """ target Predicates group(G) & rating(U, I) & target(U, I) & group(G, U) >> group_avg_item_rating(G, I) observed_ratings_df: make use of ratings_predicate and item_predicate truth_ratings_df: make use of ratings_predicate and item_predicate """ # truth target_dataframe = truth_ratings_df.loc[:, []] target_dataframe['value'] = 1 write(target_dataframe, 'target_' + partition, fold, setting)
def changeConfigFile(fileName, boardName): if (request.method == 'POST'): submittedValue = request.json d = session.get("data") index = h.findIndex(boardName, d) #print(submittedValue) #print("***************") #print( d['components'][index] ) d['components'][index] = submittedValue session['data'] = d h.write(d) return "true"
def ratings_predicate(ratings_df, partition='obs', fold='0', setting='eval', write_value=True): """ Ratings Predicates """ ratings_series = ratings_df.loc[:, ['rating']] if write_value: write(ratings_series, 'rating_' + partition, fold, setting) else: write(ratings_series.loc[:, []], 'rating_' + partition, fold, setting)
def item_predicate(observed_ratings_df, truth_ratings_df, fold='0', setting='eval'): """ Item Predicates """ observed_ratings_series = observed_ratings_df.loc[:, 'rating'] truth_ratings_series = truth_ratings_df.loc[:, 'rating'] # obs item_list = pd.concat([observed_ratings_series, truth_ratings_series], join='outer').reset_index()['movieId'].unique() item_series = pd.Series(data=1, index=item_list) write(item_series, 'item_obs', fold, setting)
def user_predicate(observed_ratings_df, truth_ratings_df, fold='0', phase='eval'): """ user Predicates """ observed_ratings_series = observed_ratings_df.loc[:, 'rating'] truth_ratings_series = truth_ratings_df.loc[:, 'rating'] # obs user_list = pd.concat([observed_ratings_series, truth_ratings_series], join='outer').reset_index()['userId'].unique() user_series = pd.Series(data=1, index=user_list) write(user_series, 'user_obs', fold, phase)
def group_average_rating_predicate(user_df, fold='0', phase='eval'): """ group_average_rating Predicates group_avg_item_rating(G, +I) / |I| = group_avg_rating(G) {I: group_item_block(G, I)} user_df: need corresponding 'M' or 'F' value observed_ratings_df: make use of ratings_predicate truth_ratings_df: make use of ratings_predicate 1.0 : group_avg_rating(G1) = group_avg_rating(G2) G1 and G2 corresponding to 'M' or 'F' equalized to enforce non-parity unfairness """ group_series = pd.Series(data=1, index=[1, 2]) write(group_series, 'group_avg_rating_targets', fold, phase)
def group_denominators(user_df, truth_ratings_df, fold='0', phase='eval'): """ group_item_block Predicates. This represents whether any member of group G provided a rating for item I in the dataframe group_avg_item_rating(G, +I) / |I| = group_avg_rating(G) {I: group_item_block(G, I)} user_df: need corresponding 'M' or 'F' value observed_ratings_df: make use of ratings_predicate and item_predicate truth_ratings_df: make use of ratings_predicate and item_predicate """ reindexed_ratings_df = truth_ratings_df.reset_index() ratings_by_group = reindexed_ratings_df.groupby( lambda x: user_df.loc[reindexed_ratings_df.loc[x].userId].gender) movies_rated_by_group = ratings_by_group['rating'].count() write(movies_rated_by_group, 'group_denominators_obs', fold, phase)
def rated_predicate(observed_ratings_df, truth_ratings_df, partition='obs', fold='0', setting='eval'): """ Rated Predicates """ observed_ratings_series = observed_ratings_df.loc[:, 'rating'] truth_ratings_series = truth_ratings_df.loc[:, 'rating'] # obs rated_series = pd.concat([observed_ratings_series, truth_ratings_series], join='outer') rated_series.loc[:, :] = 1 write(rated_series, 'rated_' + partition, fold, setting)
def main(args): files = helpers.parse_flags(args) lines = helpers.read(files["input"]) lexer = Lexer() # No errors on file if lexer.try_tokenize(lines): print("The file is fine, no error was found") # Print errors on screen else: for err in lexer.get_errors(): print("*** ERROR on line", err.line, "***", err.reason, err.word) # Write to the file helpers.write(files["output"], lexer.get_all()) print("\nCheck the file", files["output"], "for more information") sys.exit(0)
def sim_content_predicate(movies_df, fold='0', phase='eval'): """ Sim item content predicates """ print("Sim item content predicates") movie_genres_df = movies_df.drop('movie title', axis=1) # Cosine similarity movie_genres_matrix = movie_genres_df.values row_norms = [np.linalg.norm(m) for m in movie_genres_matrix] movie_genres_matrix = np.array([ movie_genres_matrix[i] / row_norms[i] for i in range(len(movie_genres_matrix)) ]) movie_content_similarity_block_frame_data = np.matmul( movie_genres_matrix, movie_genres_matrix.T) movie_similarity_df = pd.DataFrame( data=movie_content_similarity_block_frame_data, index=movies_df.index, columns=movies_df.index) # take top 50 for each movie to define pairwise blocks movie_content_similarity_block_frame = pd.DataFrame(index=movies_df.index, columns=range(50)) for m in movies_df.index: movie_content_similarity_block_frame.loc[ m, :] = movie_similarity_df.loc[m].nlargest(50).index flattened_frame = movie_content_similarity_block_frame.values.flatten() item_index = np.array([[i] * 50 for i in movie_content_similarity_block_frame.index ]).flatten() item_content_similarity_block_index = pd.MultiIndex.from_arrays( [item_index, flattened_frame]) item_content_similarity_block_series = pd.Series( data=1, index=item_content_similarity_block_index) # # populate the item_content_similarity_block_series with the similarity value # for index in item_content_similarity_block_index: # item_content_similarity_block_series.loc[index] = movie_similarity_df.loc[index[0], index[1]] write(item_content_similarity_block_series, 'sim_content_items_obs', fold, phase)
def nb_ratings_predicate(observed_ratings_df, truth_ratings_df, user_df, movies_df, fold='0', phase='eval'): """ nb_ratings Predicates. The multinomial naive bayes multi-class classifier predictions """ print("Naive Bayes Local Predictor") # build user-movie rating vector frame dummified_user_df = pd.get_dummies( user_df.drop('zip', axis=1).astype({ 'age': object, 'occupation': object })) print("Building observed user_movie_rating_vector") train_user_movie_rating_vector_df = (observed_ratings_df.drop( 'timestamp', axis=1).join(dummified_user_df, on='userId').join(movies_df.drop('movie title', axis=1), on='movieId')) print("Building test user_movie_rating_vector") test_user_movie_rating_vector_df = (truth_ratings_df.drop( 'timestamp', axis=1).join(dummified_user_df, on='userId').join(movies_df.drop('movie title', axis=1), on='movieId')) print("Fitting Naive Bayes predictor") # fit naive bayes model nb_model = MultinomialNB(alpha=1.0) nb_model.fit(train_user_movie_rating_vector_df.drop('rating', axis=1), train_user_movie_rating_vector_df.rating.astype(str)) print("Making Naive Bayes predictions") # make predictions for the user item pairs in the truth frame predictions = pd.DataFrame(nb_model.predict( test_user_movie_rating_vector_df.drop('rating', axis=1)), index=test_user_movie_rating_vector_df.index) write(predictions, 'nb_rating_obs', fold, phase)
def sim_content_predicate(movies_df, fold='0', setting='eval'): """ Ratings Predicates """ movie_genres_df = movies_df.loc[:, [ "Action", "Adventure", "Animation", "Children's", "Comedy", "Crime", "Documentary", "Drama", "Fantasy", "Film-Noir", "Horror", "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War", "Western" ]] movie_genres_matrix = movie_genres_df.values row_norms = [np.linalg.norm(m) for m in movie_genres_matrix] movie_genres_matrix = np.array([ movie_genres_matrix[i] / row_norms[i] for i in range(len(movie_genres_matrix)) ]) movie_content_similarity_block_frame_data = np.matmul( movie_genres_matrix, movie_genres_matrix.T) movie_similarity_df = pd.DataFrame( data=movie_content_similarity_block_frame_data, index=movies_df.index, columns=movies_df.index) # take top 25 for each movie to define pairwise blocks movie_content_similarity_block_frame = pd.DataFrame(index=movies_df.index, columns=range(25)) for m in movies_df.index: movie_content_similarity_block_frame.loc[ m, :] = movie_similarity_df.loc[m].nlargest(25).index flattened_frame = movie_content_similarity_block_frame.values.flatten() item_index = np.array([[i] * 25 for i in movie_content_similarity_block_frame.index ]).flatten() item_content_similarity_block_index = pd.MultiIndex.from_arrays( [item_index, flattened_frame]) item_content_similarity_block_series = pd.Series( data=1, index=item_content_similarity_block_index) write(item_content_similarity_block_series, 'sim_content_items_obs', fold, setting)
def group_item_block_predicate(user_df, truth_ratings_df, fold='0', phase='eval'): """ group_item_block Predicates. This represents whether any member of group G provided a rating for item I in the dataframe group_avg_item_rating(G, +I) / |I| = group_avg_rating(G) {I: group_item_block(G, I)} user_df: need corresponding 'M' or 'F' value observed_ratings_df: make use of ratings_predicate and item_predicate truth_ratings_df: make use of ratings_predicate and item_predicate """ print("Group Item Block Predicate") reindexed_ratings_df = truth_ratings_df.reset_index() ratings_by_group = reindexed_ratings_df.groupby( lambda x: user_df.loc[reindexed_ratings_df.loc[x].userId].gender) movies_rated_by_group = ratings_by_group['movieId'].unique() group_movie_tuples = [(1, m_) if g == 'F' else (2, m_) for g, m in movies_rated_by_group.to_dict().items() for m_ in m] group_movie_index = pd.MultiIndex.from_tuples(group_movie_tuples, names=['group', 'movies']) group_movie_df = pd.DataFrame(index=group_movie_index, columns=['value']) group_movie_df.value = 1 write(group_movie_df, 'group_item_block_obs', fold, phase) write(group_movie_df.loc[1], 'group_1_item_block_obs', fold, phase) write(group_movie_df.loc[2], 'group_2_item_block_obs', fold, phase)