def main(): # Loading Parameters parser = init_parameters() args, _ = parser.parse_known_args() # Updating Parameters (cmd > yaml > default) args = update_parameters(parser, args) # Setting save_dir save_dir = get_save_dir(args) U.set_logging(save_dir) with open('{}/config.yaml'.format(save_dir), 'w') as f: yaml.dump(vars(args), f) # Processing if args.generate_data or args.generate_label: g = Generator(args) g.start() elif args.extract or args.visualization: if args.extract: p = Processor(args, save_dir) p.extract() if args.visualization: v = Visualizer(args) v.start() else: p = Processor(args, save_dir) p.start()
def run_naive_bayes_topic(self): ''' This function takes no inputs and returns nothing. Function will: - Load the corpus to a pandas dataframe. - Perform train test split on the dataset. - Perform preprocessing on the text and create TF-IDF array of the corpus. - Train Naive Bayes model for topic classification on TF-IDF and save as a .pkl file to the models directory. - Print performance metrics to the console and save a .png file of Confusion Matrix. ''' print("Running Naive Bayes Classification with TF-IDF") twitter = pd.read_csv('../data/full-corpus.csv', encoding='utf-8') viz = Visualizer() nb = Naive_Bayes() dfc = DF_Cleaner() ''' Topic Classification with Naive Bayes ''' y = twitter.pop('Topic') X_train, X_test, y_train, y_test = train_test_split(twitter, y, random_state=42) train_text = X_train['TweetText'].to_numpy() test_text = X_test['TweetText'].to_numpy() X_train_counts, X_train_tfidf = nb.compute_tf_and_tfidf(train_text) y_pred = nb.classify(X_train_tfidf, y_train, test_text) nb.print_metrics(y_test, y_pred) nb.pickle_model(filepath_cv='../models/count_vect_companies.pkl', filepath_clf='../models/naive_bayes_companies.pkl') viz.plot_confusion_matrix(y_test, y_pred, classes=['apple', 'google', 'microsoft', 'twitter'], \ title='Multinomial Naive Bayes with TF-IDF') plt.savefig('../media/confusion_matrix/tfidf_nb_confmat_companies.png') plt.close() print('\n\n')
def main(): parser = Init_parameters() # Update parameters by yaml args = parser.parse_args() if os.path.exists('/home/aayadi/projet/RA-GCNv22/configs/' + args.config + '.yaml'): with open( '/home/aayadi/projet/RA-GCNv22/configs/' + args.config + '.yaml', 'r') as f: yaml_arg = yaml.load(f, Loader=yaml.FullLoader) default_arg = vars(args) for k in yaml_arg.keys(): if k not in default_arg.keys(): raise ValueError('Do NOT exist the parameter {}'.format(k)) parser.set_defaults(**yaml_arg) else: raise ValueError('Do NOT exist this config: {}'.format(args.config)) # Update parameters by cmd args = parser.parse_args() # Show parameters print('\n************************************************') print('The running config is presented as follows:') v = vars(args) for i in v.keys(): print('{}: {}'.format(i, v[i])) print('************************************************\n') # Processing os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(list(map(str, args.gpus))) if args.visualization: if args.extract: p = Processor(args) p.extract() print('Starting visualizing ...') v = Visualizer(args) v.show_wrong_sample() v.show_important_joints() v.show_heatmap() v.show_skeleton() print('Finish visualizing!') else: p = Processor(args) p.start()
def run_doc2vec_naivebayes(self): ''' This function takes no inputs and returns nothing. Function will: - Load data to pandas dataframe. - Balance the corpus so that there is an equal amount of tweets for each sentiment and drop tweets labeled with irrelevant sentiment. - Perform train test split on the dataset. - Perform preprocessing on the text and create Doc2Vec array of the corpus. - Train Naive Bayes model for sentiment classification on Doc2Vec and save as a .pkl file to the models directory. - Print performance metrics to the console and save a .png file of Confusion Matrix. ''' print("Running Naive Bayes Classification with Doc2Vec") twitter = pd.read_csv('../data/full-corpus.csv', encoding='utf-8') dfc = DF_Cleaner() viz = Visualizer() # Balancing and Train Test Split pos_df, neg_df, neutral_df, irr_df = dfc.get_sentiment_df(twitter) balanced_df = dfc.balance_df([neg_df, neutral_df], pos_df) train, test = train_test_split(balanced_df, test_size=0.3, random_state=42) ''' Sentiment Classification with Naive Bayes and Doc2Vec ''' d2v = My_Doc2Vec() test_tagged, train_tagged = d2v.tag_doc(test, train) d2v.create_model_and_vocab(train_tagged) d2v.train_model(test_tagged, train_tagged) y_train, X_train = d2v.vec_for_learning(train_tagged) y_test, X_test = d2v.vec_for_learning(test_tagged) clf = GaussianNB() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print('Testing accuracy %s' % accuracy_score(y_test, y_pred)) print('Testing F1 score: {}'.format(f1_score(y_test, y_pred, average='weighted'))) d2v.pickle_model(clf, filepath='../models/doc2vec_naive_bayes.pkl') viz.plot_confusion_matrix(y_test, y_pred, classes=['positive', 'negative', 'neutral', 'irrelevant'], \ title='Guassian Navie Bayes with Doc2Vec') plt.savefig('../media/confusion_matrix/d2v_nb_confmat.png') plt.close()
def run_naive_bayes_sentiment(self): ''' This function takes no inputs and returns nothing. Function will: - Load data to pandas dataframe. - Balance the corpus so that there is an equal amount of tweets for each sentiment and drop tweets labeled with irrelevant sentiment. - Perform train test split on the dataset. - Perform preprocessing on the text and create TF-IDF array of the corpus. - Train Naive Bayes model for sentiment classification on TF-IDF and save as a .pkl file. - Print performance metrics to the console and save a .png file of Confusion Matrix. ''' print("Running Naive Bayes Classification with TF-IDF") twitter = pd.read_csv('../data/full-corpus.csv', encoding='utf-8') viz = Visualizer() nb = Naive_Bayes() dfc = DF_Cleaner() ''' Sentiment Classification with Naive Bayes ''' pos_df, neg_df, neutral_df, irr_df = dfc.get_sentiment_df(twitter) balanced_df = dfc.balance_df([neg_df, neutral_df], pos_df) y = balanced_df.pop('Sentiment') X_train, X_test, y_train, y_test = train_test_split(balanced_df, y, random_state=42) train_text = X_train['TweetText'].to_numpy() test_text = X_test['TweetText'].to_numpy() X_train_counts, X_train_tfidf = nb.compute_tf_and_tfidf(train_text, ngram_range=(1, 5)) y_pred = nb.classify(X_train_tfidf, y_train, test_text) nb.print_metrics(y_test, y_pred) nb.pickle_model(filepath_cv='../models/count_vect_sent.pkl', filepath_clf='../models/naive_bayes_sent.pkl') viz.plot_confusion_matrix(y_test, y_pred, classes=['positive', 'negative', 'neutral'], \ title='Multinomial Naive Bayes with TF-IDF') plt.savefig('../media/confusion_matrix/tfidf_nb_confmat_sentiment.png') plt.close() print('\n\n')
def visualize_3d(config, event_df, withNN=False, vertex_stats=None): cfg_vis = config['visualize'] assert cfg_vis['mode'] == '3d' visualizer_all_tracks = Visualizer(event_df, 'ALL TRACKS') visualizer_lost_tracks = Visualizer(event_df, 'LOST TRACKS') visualizer_found_tracks = Visualizer(event_df, 'FOUND TRACKS') visualizer_all_tracks.add_coord_planes(config['stations_sizes']) visualizer_lost_tracks.add_coord_planes(config['stations_sizes']) visualizer_found_tracks.add_coord_planes(config['stations_sizes']) if withNN: event_df_tracks = event_df[event_df.track != -1] batch_tracks_hits, batch_track_idx, short_tracks, short_tracks_idxs, \ short_track_ellipses, lost_tracks, track_lost_last_ellipse = reconstruct_event(event_df, get_nn(config['network']), 6, config['z_stations'], config['stations_sizes'], vertex_stats=vertex_stats) #visualizer.init_draw(reco_tracks=batch_track_idx) visualizer_lost_tracks.init_draw(reco_tracks=lost_tracks, draw_all_hits=True) for ind, (last_index, ell) in enumerate(track_lost_last_ellipse): visualizer_lost_tracks.add_nn_pred( last_index, lost_tracks[ind][last_index - 1], ell[:2], ell[2:]) visualizer_found_tracks.init_draw(reco_tracks=batch_track_idx) visualizer_all_tracks.init_draw(draw_all_tracks_from_df=True) visualizer_found_tracks.draw(False) visualizer_lost_tracks.draw(False) visualizer_all_tracks.draw(True) else: visualizer = Visualizer(event_df, 'ALL TRACKS') visualizer.init_draw(draw_all_tracks_from_df=True) visualizer.draw()
def run_lda(corpus, num_topics=4, custom_stopwords=False, filepath_wc=None, make_vis=True, filepath_lda=None): ''' Running LDA with Gensim ''' cleaner = Gensim_LDA(corpus) viz = Visualizer() if custom_stopwords: # Using custom StCleaner(opwords cleaner.tokenize_corpus(custom_stopwords=True) word_count = cleaner.wc_whole_corpus() if filepath_wc is None: viz.plot_wc(word_count, filepath='media/tf_custom_sw.png') else: viz.plot_wc(word_count, filepath=filepath_wc) else: # Using Gensim Stopwords cleaner.tokenize_corpus() word_count = cleaner.wc_whole_corpus() if filepath_wc is None: viz.plot_wc(word_count, filepath='media/tf_whole_corpus.png') else: viz.plot_wc(word_count, filepath=filepath_wc) cleaner.create_bow() lda = cleaner.create_lda_model(num_topics=num_topics) cleaner.print_top_words(lda) cleaner.print_perplexity_coherence(lda) if make_vis: viz.make_pyLDAvis(lda, cleaner.bow, cleaner.id2word, filepath=filepath_lda) return cleaner, lda
def run_visualizer(particle_filter): # visualizer yes/no logic visualizer = Visualizer(particle_filter) # start it visualizer.start()
def test_market(): # Environment variables outdir = '/home/younesz/Desktop/SUM' window_state = 32 open_cost = 3 time_difference = True wavelet_channels = 0 batch_size = 16 rootStore = open('../dbloc.txt', 'r').readline().rstrip('\n') # Agent specific options #agent_opt = {'type': 'DQN', 'acSpace':5, 'lr':1e-3, 'nz':'dummy', 'batch_size':batch_size, 'action_labels':['short100', 'short50', 'neutral', 'long50' ,'long100']} agent_opt = { 'type': 'DDPG', 'acSpace': 1, 'lr': [1e-4, 1e-3], 'nz': 'dummy', 'batch_size': batch_size, 'action_labels': ['continuous'], 'action_conversion': None } # import market environment from src.emulator import Market from src.sampler import SinSampler, BTCsampler #sampler = SinSampler('single', 180, 1.5, (20, 40), (49, 50), fld=outdir) sampler = BTCsampler(False, wavelet_channels=0, variables=['Close'], fld=path.join(rootStore, 'data', 'BTCsampler', 'db_bitcoin.csv')) env = Market(sampler, window_state, open_cost, time_difference=time_difference, wavelet_channels=wavelet_channels, action_range=[-1, 1], action_labels=agent_opt['action_labels']) # Set agent agent = Agent(agent_opt['type'], window_state, agent_opt['acSpace'], layer_units=[80, 60], noise_process=agent_opt['nz'], outputdir=outdir, learning_rate=agent_opt['lr'], batch_size=agent_opt['batch_size']) agent.p_model = agent.model fld_save = path.join( rootStore, 'results', sampler.title, agent_opt['type'], str((env.window_state, sampler.window_episode, batch_size, agent_opt['lr'], agent.discount_factor, 0, env.open_cost))) # Set visualizer from src.visualizer import Visualizer visualizer = Visualizer(env.action_labels) # Set simulator from src.simulators import Simulator simulator = Simulator(agent, env, visualizer=visualizer, fld_save=fld_save) simulator.agent_opt = agent_opt # Train simulator.train(200, save_per_episode=1, exploration_decay=0.99, learning_rate=agent_opt['lr'], exploration_min=0.05, print_t=False, exploration_init=0.8) # Test simulator.test(50, save_per_episode=1, subfld='in-sample testing')
def main(): parser = Init_parameters() # Update parameters by yaml args = parser.parse_args() if os.path.exists('./configs/' + args.config + '.yaml'): with open('./configs/' + args.config + '.yaml', 'r') as f: yaml_arg = yaml.load(f, Loader=yaml.FullLoader) default_arg = vars(args) for k in yaml_arg.keys(): if k not in default_arg.keys(): raise ValueError('Do NOT exist the parameter {}'.format(k)) parser.set_defaults(**yaml_arg) else: raise ValueError('Do NOT exist this config: {}'.format(args.config)) # Update parameters by cmd args = parser.parse_args() # Show parameters print('\n************************************************') #if type(args.gpus) == int: # n = args.gpus # if n == 4: # args.gpus = [0, 1, 2, 3] # else: # args.gpus = [0] print('The running config is presented as follows:') print_default_keys = ['config', 'batch_size', 'pretrained', 'model_stream'] print_eval_keys = [ 'occlusion_part', 'occlusion_time', 'occlusion_block', 'occlusion_rand', 'jittering_joint', 'jittering_frame', 'sigma' ] v = vars(args) if '-g' in sys.argv or '--gpus' in sys.argv: aa = args.gpus args.gpus = [int(x) for x in aa.split(',')] else: if node == 'obama': args.gpus = [0, 1, 2, 3] elif node == 'puma': args.gpus = [0] else: args.gpus = [0] for i in v.keys(): if i in print_default_keys: print('{}: {}'.format(i, v[i])) if args.evaluate: for i in v.keys(): if i in print_eval_keys: if v[i]: print('{}: {}'.format(i, v[i])) print('************************************************\n') # Processing os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(list(map(str, args.gpus))) if args.visualization: if args.extract: p = Processor(args) p.extract() print('Starting visualizing ...') v = Visualizer(args) v.show_wrong_sample() v.show_important_joints() v.show_heatmap() v.show_skeleton() print('Finish visualizing!') else: if args.baseline: p = Processor_BS(args) else: p = Processor(args) p.start()
def __init__(self): self.l = logging.getLogger(__name__ + "." + self.__class__.__name__) cols, rows = shutil.get_terminal_size((100, 50)) os.environ['COLUMNS'] = str(cols) ap = argparse.ArgumentParser() subParser = ap.add_subparsers(dest="subap", help='Advanced Options') editAp = subParser.add_parser("edit") gEditAp = editAp.add_mutually_exclusive_group(required=True) insertAp = subParser.add_parser("insert") gInsertAp = insertAp.add_mutually_exclusive_group(required=True) ap.add_argument("-n", "--stamp-new", dest="stamp_new", help="Starts a new workday", action="store_true") ap.add_argument("-p", "--stamp-pause", dest="stamp_pause", help="Pauses the current workday", action="store_true") ap.add_argument("-r", "--stamp-resume", dest="stamp_resume", help="Resumes the current workday", action="store_true") ap.add_argument("-e", "--stamp-end", dest="stamp_end", help="Ends the workday", action="store_true") ap.add_argument("-S", "--display-saldo", dest="display_saldo", help="Displays the time saldo", action="store_true") ap.add_argument("-L", "--display-last", dest="display_last", help="Displays latest ended workday", action="store_true") ap.add_argument("-D", "--display-day", dest="display_info", default=None, const=time.time(), nargs="?", metavar="dd.mm.yyyy", help="Displays info about a workday") ap.add_argument("-W", "--display-week", dest="display_week", default=None, const=time.time(), nargs="?", metavar="dd.mm.yyyy", help="Displays summary of week") ap.add_argument("-M", "--display-month", dest="display_month", default=None, const=time.time(), nargs="?", metavar="mm.yyyy", help="Displays summary of month") ap.add_argument("-Y", "--display-year", dest="display_year", default=None, const=time.time(), nargs="?", metavar="yyyy", help="Displays summary of the year") ap.add_argument("-X", "--display-proc", dest="display_proc", help="Shows how long the calculation took", action="store_true") gEditAp.add_argument( "-s", "--set-start", dest="set_start", metavar=("<HH:MM>", "dd.mm.yyyy"), nargs="+", help= "Set the start time from the given day. When no day is given either the last open day (1st) or the last closed day (2nd) is choosen." ) gEditAp.add_argument( "-e", "--set-end", dest="set_end", metavar=("<dd.mm.yyyy:HH:MM>", "dd.mm.yyyy"), nargs="+", help= "Set the end time for the given day. When no day is given, then the last closed day is choosen." ) gEditAp.add_argument( "-S", "--move-start", dest="move_start", metavar=("<<s/+>HH:MM>", "dd.mm.yyyy"), nargs="+", help= "Moves the start time from the given day (+=forward, s=backward). When no day is given either the last open day (1st) or the last closed day (2nd) is choosen." ) gEditAp.add_argument( "-E", "--move-end", dest="move_end", metavar=("<<s/+>HH:MM>", "dd.mm.yyyy"), nargs="+", help= "Moves the end time from the given day (+=forward, s=backward). When no day is given, then the last closed day is choosen." ) gInsertAp.add_argument( "-n", "--workday", dest="insert_workday", metavar=("<dd.mm.yyyy:HH:MM>", "<HH:MM>"), nargs=2, help= "Inserts a new workday at the given day and time with the specified length as positive offset." ) gInsertAp.add_argument( "-b", "--break", dest="insert_break", metavar=("<dd.mm.yyyy>", "<dd.mm.yyyy:HH:MM>", "<HH:MM>"), nargs=3, help= "Inserts a new break into the given workday, starting from the given day and time, with the specified positive offset for the break end" ) #gEditAp.add_argument("-b", "--insert-break", dest="insert_break", metavar=("<dd.mm.yyyy>", "<HH:MM>", "<+HH:MM>"), nargs=3, help="Insert a break into the given day, at the given time with the given offset.") now = time.time() * 1000 SettingsHelper.rangesToArray() stamper = Stamper() visualizer = Visualizer() self.print_head() args = ap.parse_args() if args.display_saldo: visualizer.saldo() elif args.display_month: if (isinstance(args.display_month, str)): args.display_month = datetime.strptime(args.display_month, "%m.%Y").timestamp() visualizer.month(args.display_month) elif args.display_week: if (isinstance(args.display_week, str)): args.display_week = datetime.strptime(args.display_week, "%d.%m.%Y").timestamp() visualizer.week(args.display_week) elif args.display_year: if (isinstance(args.display_year, str)): args.display_year = datetime.strptime(args.display_year, "%Y").timestamp() visualizer.year(args.display_year) elif args.display_last: visualizer.last() elif args.stamp_new: stamper.new() elif args.stamp_pause: stamper.pause() elif args.stamp_resume: stamper.resume() elif args.stamp_end: stamper.end() if args.display_info: if (isinstance(args.display_info, str)): ts = datetime.strptime(args.display_info, "%d.%m.%Y").timestamp() visualizer.day(ts) else: visualizer.ongoing() if args.subap == "edit": if args.set_start and len(args.set_start) >= 1: newTime = datetime.strptime( args.set_start[0], "%H:%M").timestamp() + 2208992400 #epoch ts = None if (len(args.set_start) >= 2): ts = datetime.strptime(args.set_start[1], "%d.%m.%Y").timestamp() stamper.moveStart(newTime, ts=ts, visualizer=visualizer, noOffset=True) elif args.set_end and len(args.set_end) >= 1: newTime = 0 setDirect = False setFromDaystart = False if ("." in args.set_end[0]): newTime = datetime.strptime(args.set_end[0], "%d.%m.%Y:%H:%M").timestamp() setDirect = True else: newTime = datetime.strptime( args.set_end[0], "%H:%M").timestamp() + 2208992400 #epoch setFromDaystart = True ts = None if (len(args.set_end) >= 2): ts = datetime.strptime(args.set_end[1], "%d.%m.%Y").timestamp() stamper.moveEnd(newTime, ts=ts, visualizer=visualizer, setDirect=setDirect, setFromDaystart=setFromDaystart) elif args.move_start and len(args.move_start) >= 1: newOffsetStr = args.move_start[0][1:] newOffset = datetime.strptime( newOffsetStr, "%H:%M").timestamp() + 2208992400 #epoch if (args.move_start[0][:1] == "s"): newOffset = newOffset * -1 ts = None if (len(args.move_start) >= 2): ts = datetime.strptime(args.move_start[1], "%d.%m.%Y").timestamp() stamper.moveStart(newOffset, ts=ts, visualizer=visualizer) elif args.move_end and len(args.move_end) >= 1: newOffsetStr = args.move_end[0][1:] newOffset = datetime.strptime( newOffsetStr, "%H:%M").timestamp() + 2208992400 #epoch if (args.move_end[0][:1] == "s"): newOffset = newOffset * -1 ts = None if (len(args.move_end) >= 2): ts = datetime.strptime(args.move_end[1], "%d.%m.%Y").timestamp() stamper.moveEnd(newOffset, ts=ts, visualizer=visualizer) elif args.insert_break and len(args.insert_break) == 3: pass if args.subap == "insert": if args.insert_workday: insertAt = datetime.strptime(args.insert_workday[0], "%d.%m.%Y:%H:%M").timestamp() offset = Utils.convertHMToSeconds(args.insert_workday[1], separator=":") stamper.insert_workday(insertAt, offset, setDirect=False, visualizer=visualizer) elif args.insert_break: insertAt = datetime.strptime(args.insert_break[0], "%d.%m.%Y").timestamp() bStart = datetime.strptime(args.insert_break[1], "%d.%m.%Y:%H:%M").timestamp() offset = Utils.convertHMToSeconds(args.insert_break[2], separator=":") stamper.insert_break(insertAt, bStart, offset, setDirect=False, visualizer=visualizer) if (args.display_proc): self.l.info("Calculation took " + str((time.time() * 1000) - now) + "ms")
type=int, help='Number of units to skip before each vector', default=1, nargs='?') parser.add_argument('--bound', metavar='bound', type=str, help='Number of values to show. Eg. -10,10', nargs='?', default="-10,10") parser.add_argument( '--prop', metavar='prop', type=int, help='Set this value to change the cutoff for changing color', default=0, nargs='?') args = parser.parse_args() if args.mode.upper() == "COLOR": v = Visualizer(f_x=args.fx, f_y=args.fy) v.plot_color(bound=tuple(map(int, args.bound.split(','))), skip=args.skip, prop=args.prop) elif args.mode.upper() == "BLACK": v = Visualizer(f_x=args.fx, f_y=args.fy) v.plot( bound=tuple(map(int, args.boundx.split(','))), skip=args.skip, )
def make_plots(self): ''' This function takes no inputs and returns nothing. Function will: - Load data to pandas dataframe. - Create bar chart of 20 most common words in the corpus. - Create word clouds of words relating to tweets for all the different sentiments, all topics, and the whole corpus. - Create bar chart of the number of tweets labeled with each sentiment. - Create bar chart of the number of tweets labeled with each topic. - All plots produced are saved as .png files to the media directory in their appropriate subdirectories. ''' print("Creating Plots of the data") twitter = pd.read_csv('../data/full-corpus.csv', encoding='utf-8') viz = Visualizer() dfc = DF_Cleaner() pos_df, neg_df, neutral_df, irr_df = dfc.get_sentiment_df(twitter) apple_df, google_df, ms_df, twitter_df = dfc.get_topics_df(twitter) # Remove stop words and perform lemmatization to create Pandas Series processed_docs = twitter['TweetText'].apply(lambda x: dfc.preprocess(x, remove_common=False)) processed_pos = pos_df['TweetText'].apply(lambda x: dfc.preprocess(x, remove_common=True)) processed_neg = neg_df['TweetText'].apply(lambda x: dfc.preprocess(x, remove_common=True)) processed_neutral = neutral_df['TweetText'].apply(lambda x: dfc.preprocess(x, remove_common=True)) processed_apple = apple_df['TweetText'].apply(lambda x: dfc.preprocess(x, remove_common=False)) processed_google = google_df['TweetText'].apply(lambda x: dfc.preprocess(x, remove_common=False)) processed_ms = ms_df['TweetText'].apply(lambda x: dfc.preprocess(x, remove_common=False)) processed_twitter = twitter_df['TweetText'].apply(lambda x: dfc.preprocess(x, remove_common=False)) # Converting Pandas Series to numpy array doc_array = processed_docs.to_numpy() pos_doc = processed_pos.to_numpy() neg_doc = processed_neg.to_numpy() neutral_doc = processed_neutral.to_numpy() apple_doc = processed_apple.to_numpy() google_doc = processed_google.to_numpy() ms_doc = processed_ms.to_numpy() twitter_doc = processed_twitter.to_numpy() # Creating dictionary of word counts word_counts = dfc.wc_corpus(doc_array) pos_wordcounts = dfc.wc_corpus(pos_doc) neg_wordcounts = dfc.wc_corpus(neg_doc) neutral_wordcounts = dfc.wc_corpus(neutral_doc) # Converting Corpus numpy array to one giant string for word cloud big_string = dfc.doc_array_to_str(doc_array) pos_string = dfc.doc_array_to_str(pos_doc) neg_string = dfc.doc_array_to_str(neg_doc) neutral_string = dfc.doc_array_to_str(neutral_doc) apple_string = dfc.doc_array_to_str(apple_doc) google_string = dfc.doc_array_to_str(google_doc) ms_string = dfc.doc_array_to_str(ms_doc) twitter_string = dfc.doc_array_to_str(twitter_doc) print("creating bar plot of word counts") viz.plot_wc(word_counts, filepath='../media/tf/tf_whole_corpus.png', title='20 Most Common Words in Corpus') print("creating word clouds") viz.plot_wordcloud(big_string, title="All Tweets", filepath="../media/tf/word_cloud_all_tweets.png") viz.plot_wordcloud(pos_string, title="Positive Tweets", filepath="../media/tf/word_cloud_pos_tweets.png") viz.plot_wordcloud(neg_string, title="Negative Tweets", filepath="../media/tf/word_cloud_neg_tweets.png") viz.plot_wordcloud(neutral_string, title="Neutral Tweets", filepath="../media/tf/word_cloud_neutral_tweets.png") viz.plot_wordcloud(apple_string, title="Apple Tweets", filepath="../media/tf/word_cloud_apple_tweets.png") viz.plot_wordcloud(google_string, title="Google Tweets", filepath="../media/tf/word_cloud_google_tweets.png") viz.plot_wordcloud(ms_string, title="Microsoft Tweets", filepath="../media/tf/word_cloud_ms_tweets.png") viz.plot_wordcloud(twitter_string, title="Twitter Tweets", filepath="../media/tf/word_cloud_twitter_tweets.png") print("creating bar plot of sentiments") viz.plot_sentiments_bar() print("creating bar plot of categories") viz.plot_categories_bar() print('\n\n')
ensemble_solver.solve() field = 0 mean = ensemble_solver.means["value"][field] square = ensemble_solver.means["square"][field] step_errors = ensemble_solver.step_errors["value"] sample_errors = ensemble_solver.sample_errors["value"] print(f"Max step error = {step_errors[:, -1].max()}") print(f"Max sample error = {sample_errors[:, -1].max()}") vis = Visualizer( mean, (0, tmax), lattice, sample_error=ensemble_solver.sample_errors["value"][field], step_error=ensemble_solver.step_errors["value"][field]) vis2 = Visualizer( square, (0, tmax), lattice, sample_error=ensemble_solver.sample_errors["square"][field], step_error=ensemble_solver.step_errors["square"][field]) mesh_points = min([30, points, steps]) fig, ax = vis.surface(cstride=points // mesh_points, rstride=int(steps / resolution) // mesh_points) fig2, ax2 = vis.steady_state(label="Numerical solution", marker='.',