def create_cast_plot(): links, nodes = get_data() dnd = create_graph_object(links, nodes) fig = create_plot(links, nodes, dnd) # fig.write_html("output/campaign-cast.html") return fig
def main(): # Initialize data and set X and y mcd_main = get_data() T40 = drop_nulls(mcd_main, ['T40.4']) T40_complete = impute_df(T40, KNN(5)) y = T40_complete['T40.4'] X = T40_complete.drop(columns=[ 'T40.4', 'year', 'county_code', 'T40.7', 'poverty_rate_native_american', 'poverty_rate_pacific_islander', 'college_degree', 'poverty_rate' ]) # Create regression models for comparison l1_ratio = np.linspace(0.1, 1, 100) cv = 5 # Number of k-fold cross validations alphas = np.linspace(0.1, 100, 100) elastic = LinearDataset(X, y, ElasticNetCV(l1_ratio=l1_ratio), name='ElasticNet') ridge = LinearDataset(X, y, RidgeCV(cv=cv, alphas=alphas), name='Ridge') lasso = LinearDataset(X, y, LassoCV(cv=cv), name='Lasso') linear = LinearDataset(X, y, LinearRegression(), name='linear') models = [linear, elastic, ridge, lasso] # Compare models coef_matrix, error_matrix = model_comparison(X, y, models) print(tabulate(coef_matrix.round(2), headers='keys', tablefmt='pipe')) print(tabulate(error_matrix.round(2), headers='keys', tablefmt='pipe')) all_plot_actual_predicted(models) # Plot coefficient path for selected model fig, ax = plt.subplots() lasso.plot_coeff_paths(ax=ax, c_title='Lasso ') plt.show()
def get_baseline(all=True): years, past_values, values = get_data() train_x, train_y, test_x, test_y = train_test_split(past_values, values) pred = train_x train_score = mean_squared_error(train_y, pred) print('Baseline Training Score: RMSE: %s' % '{:,.0f}'.format(math.sqrt(train_score))) pred = test_x test_score = mean_squared_error(test_y, pred) print('Baseline Test Score: RMSE: %s' % '{:,.0f}'.format(math.sqrt(test_score))) bttscore = 'RMSE: %s/%s' % ('{:,.0f}'.format( math.sqrt(train_score)), '{:,.0f}'.format(math.sqrt(test_score))) if all: plot_y = [i for i in train_y] + [x for x in test_y] plot_pred = [i for i in train_x] + [x for x in test_x] else: plot_y = [None for i in train_y] + [x for x in test_y] plot_pred = [None for i in train_x] + [x for x in test_x] return np.array(plot_y), np.array(plot_pred), np.array(years), bttscore
xa = 'ploth' try: name, epochs, batches = sys.argv[1:4] except ValueError: print('Usage: %s model_name epochs batch_size %s' % (script, xa)) exit(1) try: plot = sys.argv[4] except IndexError: plot = False return name, int(epochs), int(batches), plot if __name__ == '__main__': X, Y = get_data() train_x, train_y, test_x, test_y = prep_data(X, Y) # Getting our command line parameters name, epochs, batches, plot = get_params() # Do the training model, name, mp, history = train_model(name, train_x, train_y, epochs, batches, test_x, test_y) # Save models and the training history for later use mname = 'models/model-%s-%d-%d' % (name, epochs, batches) model.save(mname + '.h5') title = '%s (epochs=%d, batch_size=%d)' % (name, epochs, batches) # Test our model on both data that has been seen # (training data set) and unseen (test data set) print('Scores for %s' % title) # Notice that we need to specify batch_size in evaluate when we're # using LSTM.
from prep import get_data, KMeansSoft import matplotlib.pyplot as plt import numpy as np from sklearn.cluster import KMeans def get_cost(m, X, responsibilities): k = len(m) dist = np.array([[x] * k for x in X]) dist = (np.sum(((dist - m)**2), axis=2)**0.5) * responsibilities return dist.sum() if __name__ == '__main__': X, y = get_data(num_clusters=10, num_samples=200, num_features=100) costs = [] for k in [1, 2, 3, 5, 8, 15, 30]: kmeans = KMeansSoft(k) KMeans_2 = KMeans(n_clusters=k).fit(X) y_pred, m, responsibilities = kmeans.fit(X, 5) cost = get_cost(m, X, responsibilities) costs.append(cost) plt.plot(costs)
import numpy as np import os if __name__ == '__main__': name, epochs, batches, _ = get_params(script='predict.py') model, _ = confs[name] mname = 'models/model-%s-%d-%d.h5' % (name, epochs, batches) # Loading the model. if os.path.exists(mname): model = load_model(mname) print('Model loaded!') else: print("Can't find %s model, train it first using 'train.py %s %d %d'" % (mname, name, epochs, batches)) years, _, values = get_data() # We're using the last value of our dataset # as a base for prediction. values, years = list(values), list(years) predict_on_value = values[-1:] predict_for_year = years[-1] + 'next' # This is where magic happens, # we get the predicted value x = model.predict(np.array(predict_on_value)) # Calcularing the error, # About 3% error in our best models error = x[0][0] * 0.02 print('Prediction from %s based on %s' % (years[-1], "{:,.0f}".format(predict_on_value[0]))) print( 'Prediction for %s is %s +/- %s' %
'-m', nargs='+', type=int, default=[1], ) parser.add_argument( '--lspn', '-l', type=str2bool, default='true', ) FLAGS, unparsed = parser.parse_known_args() data, ncat = get_data(FLAGS.dataset) # min_sample_leaf is the minimum number of samples at leaves # Define which values of min_sample_leaf to test min_sample_leaves = FLAGS.msl min_sample_leaves = [ msl for msl in min_sample_leaves if msl < data.shape[0] / 10 ] # filepath = os.path.join('missing', FLAGS.dataset + '_test.csv') meanspath = os.path.join('missing', FLAGS.dataset + '_means.csv') cispath = os.path.join('missing', FLAGS.dataset + '_cis.csv') Path('missing').mkdir(parents=True, exist_ok=True) df_all = pd.DataFrame()
if __name__ == '__main__': import sys style_img_name, content_img_name, out_img_name = sys.argv[1], sys.argv[ 2], sys.argv[3] # How much content should be # "visible", heavier/higher weight=more content details. content_weight = 1 try: content_weight = int(sys.argv[4]) except IndexError: pass # Get style and image tensors. style_img, content_img = get_data(style_img_name, content_img_name) # Create input image. input_img = content_img.clone() # Load pretrained network. cnn = models.vgg19(pretrained=True).features.to(device).eval() # Get blended image. output = run_style_transfer(cnn, style_img, content_img, input_img, content_weight=content_weight) print('Saving an output image to %s...' % out_img_name) # Remove an extra dimension that we needed to add
guess, _ = get_category(output, categories) stats_total[cat]+=1 if (guess == cat): stats_correct[cat]+=1 for c in categories: print('Test accuracy for %s on %d (%d correct) words:%d %%' % (c, stats_total[c], stats_correct[c], 100 * stats_correct[c] / stats_total[c])) if __name__ == '__main__': # Initialize our language detector rnn = RNN(n_letters, n_categories) # Initialize optimizer optimizer = torch.optim.Adam(rnn.parameters()) # Initialize our loss function loss_function = nn.CrossEntropyLoss() # Get training data print('Getting training data...') categories, train_words=get_data() # Train using 10000 words choosen randomly for # each language, in general we get around 50% words # for each language. train(rnn, optimizer, loss_function, 10000, categories, train_words) # Get test data, don't include words from training set. print('Getting test data...') test_categories, test_words=get_data_test( exclude_words=[ train_words[c] for c in all_categories ]) # Test our model on totally fresh and unique list of words. test(rnn, optimizer, test_categories, test_words) # Save our model,so we can use it for detection. torch.save(rnn.state_dict(), 'model.ckpt')
for i in range(n): pair = random.choice(pairs) print('Question in %s: %s' % (ilang.name, pair[0].ljust(20))) print('Question in %s: %s' % (olang.name, pair[1].ljust(20))) output_words = test(encoder, decoder, pair[0], ilang, olang) output_sentence = ' '.join(output_words).strip() tick = 'V' if output_sentence == pair[1] else 'X' print('Our guess:%s %s' % (output_sentence.ljust(20), tick)) print('') if __name__ == '__main__': hidden_size = hidden_size # Get maximum of 100 sentences. # Remember that in prep.py we get only questions that match secific criteria. pairs, input_lang, output_lang = get_data('en', 'spa', limit=100) # Building two GRUs, encoder and decoder. encoder = EncoderGRU(input_lang.n_words, hidden_size).to(device) decoder = DecoderGRU(hidden_size, output_lang.n_words).to(device) print('Training models...') train_all(pairs, encoder, decoder, input_lang, output_lang, 900, print_every=100) print('Saving both models...') torch.save(encoder.state_dict(), 'encoder.ckpt') torch.save(decoder.state_dict(), 'decoder.ckpt') print('Testing with random data...')
import pandas as pd from prep import get_data from stats import check_chi, check_normal_dist import seaborn as sns from scipy.stats import ttest_ind df = get_data() check_normal_dist( df) # 2 not normall distributed [employeecount, standardhours] df.employeecount.describe() # all == 1 df.standardhours.describe() # all == 80 df.drop(columns=["employeecount", "standardhours"], inplace=True) for col in df.columns: if df[col].dtype.name == "category": print(f"checking: {col}") check_chi(df.attrition, df[col]) # jobrole: dependent, H0 rejected. # joblevel: dependent, H0 rejected. # overtime: dependent, H0 rejected. # department: dependent, H0 rejected. # maritalstatus: dependent, H0 rejected. # businesstravel: dependent, H0 rejected. # jobinvolvement: dependent, H0 rejected. # educationfield: dependent, H0 rejected. # jobsatisfaction: dependent, H0 rejected. # worklifebalance: dependent, H0 rejected. # stockoptionlevel: dependent, H0 rejected. # environmentsatisfaction: dependent, H0 rejected.
def get_params(script='train.py'): """ Get command line parameters. """ try: name, epochs, batches = sys.argv[1:4] except ValueError: print('Usage: %s model_name epochs batch_size' % sys.argv[0]) exit(1) return name, int(epochs), int(batches) if __name__ == '__main__': # Getting our command line parameters name, epochs, batches = get_params() train_x, train_y, test_x, test_y, inputs, max_length, t = get_data( do_cleanup=True, filter_stopwords=True) print('Train/Test Data lenght', len(train_x), len(test_x)) model, name, mp = train_model(name, train_x, train_y, epochs, batches, inputs, max_length, test_x, test_y) # Save model to use for classification later on mname = 'models/model-%s-%d-%d' % (name, epochs, batches) model.save(mname + '.h5') with open(mname + '-tokenizer.pickle', 'wb') as ts: pickle.dump(t, ts) title = '%s (epochs=%d, batch_size=%d)' % (name, epochs, batches) # Test our model on both data that has been seen # (training data set) and unseen (test data set) print('Evaluation for %s' % title) loss, acc = model.evaluate(train_x, train_y, verbose=2) print('Train Accuracy: %.2f%%' % (acc * 100)) loss, acc = model.evaluate(test_x, test_y, verbose=2)
self.s = 0 self.h = 0 def train(self, X, y): n = X.shape[0] self.spam = (X[y == 1] / 100).sum(axis=0) / n self.ham = (X[y == 0] / 100).sum(axis=0) / n self.s = len(X[y == 1]) self.h = len(X[y == 0]) def score(self, X, y): y_pred = np.argmax(np.vstack((X @ np.log(self.ham) + np.log(self.h), X @ np.log(self.spam) + np.log(self.s))), axis=0) return (y == y_pred).mean() if __name__ == '__main__': X, y = get_data() X_train, y_train = X[:3000], y[:3000] X_test, y_test = X[3000:], y[3000:] nb = NB() nb.train(X_train, y_train) score = nb.score(X_test, y_test) print(score)
def train_model(train_x, train_y, epochs, batches): model = get_mlp(train_x.shape[1]) model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse', 'mape']) model.fit(train_x, train_y, verbose=2, epochs=epochs, batch_size=batches) return model def r2_score(y_test, y_pred): return 1 - sum((y_test - y_pred) ** 2) / sum((y_test - y_pred.mean()) ** 2) if __name__ == '__main__': data = get_data() X, Y, years = get_xy(data, 2) train_x, test_x, train_y, test_y = train_test_split(X, Y, test_size=0.2) epochs, batches = 64, 2 model = train_model(train_x, train_y, epochs, batches) y_pred = np.array(model.predict(test_x)).flatten() print(r2_score(test_y, y_pred)) y_plt = np.array(model.predict(X).flatten()) plt.plot(years, y_plt) plt.plot(years, Y)
'Amazon': 450, 'Apple': 50, 'Dell': 20, 'Facebook': 55, 'Google': 410, 'Microsoft': 45, 'Tesla': 150, 'Twitter': 20, 'Wallmart': 40 } for cname in comp_list: data_path = 'data/' + cname + '/' + cname + '.csv' X, Y = get_data(data_path) train_x, train_y, test_x, test_y = prep_data(X, Y) # Getting our command line parameters #name, epochs, batches, plot=get_params() name = "default" epochs = optimal_epochs_map[cname] batches = 1 plot = "ploth" plot = False # Do the training model, name, mp, history = train_model(name, train_x, train_y, epochs, batches, test_x, test_y) # Save models and the training history for later use mname = 'models/' + cname + '/model-%s-%d-%d' % (name, epochs, batches) model.save(mname + '.h5')
import dash_table #Sub Modules from prep import get_data from homepage import Homepage from appA import AppA, build_graphA1, build_graphA2, build_graphA3, split_filter_part from appB import AppB, core_layoutB, build_graphBA1, build_graphBA2, build_graphBA3 #from appB import AppB, core_layoutB, build_graphBA1, build_graphBA2, build_graphBA3 #real B from interact import AppC, build_graphC1 app = dash.Dash(__name__, external_stylesheets=[dbc.themes.COSMO]) app.config.suppress_callback_exceptions = True df, df_num, df_noncumun_whole, noncumun_dfs = get_data() app.layout = html.Div( [dcc.Location(id='url', refresh=False), html.Div(id='page-content')]) #NavBar @app.callback(Output('page-content', 'children'), [Input('url', 'pathname')]) def display_page(pathname): if pathname == '/Pipe-dreams': return AppA() if pathname == '/Scattergories': return AppB() if pathname == '/Interact': return AppC()
except ValueError: print('Usage: %s model_name epochs batch_size filename' % sys.argv[0]) exit(1) filename = None if predict: try: filename = sys.argv[4] except IndexError: pass return name, int(epochs), int(batches), filename if __name__ == '__main__': # Getting our command line parameters name, epochs, batches, _ = get_params() # Getting our images correctly converted # to the right format of arrays/matrices. train_x, train_y, inputs, classes = get_data() # Time for training! model, name, mp, train_x, train_y, test_x, test_y = train_model( name, train_x, train_y, epochs, batches, inputs, classes) # Save model to use for classification later on. mname = 'models/model-%s-%d-%d' % (name, epochs, batches) model.save(mname + '.h5') title = '%s (epochs=%d, batch_size=%d)' % (name, epochs, batches) print('Evaluation for %s' % title) loss, acc = model.evaluate(train_x, train_y, verbose=2) print('Train accuracy: %.2f%%' % (acc * 100)) loss, acc = model.evaluate(test_x, test_y, verbose=2) print('Test accuracy: %.2f%%' % (acc * 100))
# Loss is our loss or cost function - mean_squared_error # is a good choice assuming we don't have a lot of "outliers" # in our dataset. # Adam optimizer works great for most problems. # # Metrics are loss metrics that we want to have available for each epoch, # so we can review how are we doing at each training stage. # mse is mean_squared_error, mpe is mean_absolute_percentage_error model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse','mape']) # Here we're starting our training history=model.fit(train_x, train_y, verbose=2, epochs=epochs, batch_size=batches) return model, name, mparams, history if __name__ == '__main__': # Getting data formatted as a supervised problem years, past_values, values=get_data() X, Y = past_values, values # Split data into two parts: one for training, one for testing # Test part won't be seen by a model during training so it will # give us some idea how our model performs on a unseen data. train_x, train_y, test_x, test_y = train_test_split(X, Y) # Getting our command line parameters name, epochs, batches, plot=get_params() # Do the training model, name, mp, history=train_model(name, train_x, train_y, epochs, batches) # Save models and the training history for later use mname='models/model-%s-%d-%d' % (name, epochs, batches) model.save(mname+'.h5') with open(mname+'-history.pickle', 'wb') as ms: pickle.dump(history.history, ms) print()
as well as a custom CNN model if it's available. Custom CNN model has to be available in model.ckpt file. You can generate this file by running ./train.py script. """ import torch from torchvision import transforms, models from PIL import Image import os.path from train import BeaverNet from prep import get_data # Getting the mapping between classes index and # their names. _, _, cifar100_classes = get_data() def get_imgnet_classes(): """ Get labels/classes for ImageNet (AlexNet). Source: https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a """ return eval(open('imagenet1000_clsid_to_human.txt').read()) def prep_pretrained(imgf): """ Process an image so it can be used with pre trained models available in PyTorch (including AlexNet).