def main(expdir, recipe, computing): '''main method''' if recipe is None: raise Exception('no recipe specified. Command usage: ' 'nabu data --recipe=/path/to/recipe') if not os.path.isdir(recipe): raise Exception('cannot find recipe %s' % recipe) if expdir is None: raise Exception('no expdir specified. Command usage: ' 'nabu data --expdir=/path/to/recipe ' '--recipe=/path/to/recipe') if computing not in ['standard', 'condor']: raise Exception('unknown computing mode: %s' % computing) #read the data conf file parsed_cfg = configparser.ConfigParser() parsed_cfg.read(os.path.join(recipe, 'database.conf')) #loop over the sections in the data config for name in parsed_cfg.sections(): print 'processing %s' % name #read the section conf = dict(parsed_cfg.items(name)) if not os.path.exists(conf['dir']): os.makedirs(conf['dir']) else: print '%s already exists, skipping this section' % conf['dir'] continue #create the expdir for this section if not os.path.isdir(os.path.join(expdir, name)): os.makedirs(os.path.join(expdir, name)) #create the database configuration dataconf = configparser.ConfigParser() dataconf.add_section(name) for item in conf: dataconf.set(name, item, conf[item]) with open(os.path.join(expdir, name, 'database.conf'), 'w') as fid: dataconf.write(fid) #copy the processor config shutil.copyfile( conf['processor_config'], os.path.join(expdir, name, 'processor.cfg')) if computing == 'condor': if not os.path.isdir(os.path.join(expdir, name, 'outputs')): os.makedirs(os.path.join(expdir, name, 'outputs')) subprocess.call(['condor_submit', 'expdir=%s' % os.path.join(expdir, name), 'nabu/computing/condor/dataprep.job']) else: data.main(os.path.join(expdir, name))
def main(): # EDA da.main() st.header("SVM_RBF Predictor") # Predictor ma.main()
def main(): data.main() create_unifo_table() content_list = data.content url_list = data.url_list for i in range(len(content_list)): insert_data(content_list[i][0], content_list[i][1], content_list[i][2]) create_url_table() for i in range(len(url_list)): insert_url(url_list[i])
def main(script): """Run all the functions in this module.""" data.main() # Populate all objects # Generate a LaTeX table with all stopwords _generate_columned_table(sorted(data.get_stopwords()), 6, 'stopwords', 'Norwegian stopwords') # Generate a LaTeX table with all medical terms _generate_columned_table(sorted(data.get_medical_terms()), 3, 'medicalterms', 'Medical terms') generate_cases_table() calculate_chapter_statistics() calculate_case_statistics()
def main(): try: credentials = tls.get_credentials_file() except: ## except credentials error and print for them to enter something credentials = {} credentials['username'] = raw_input("Plotly Username: "******"api key: ") ### get password py.sign_in(credentials['username'], credentials['api_key']) survey_file = "survey.csv" run_data = d.main() for runner in run_data.runners: runner.make_data() print runner.median #print runner.num , runner.total , runner.count, runner.avg, runner.dur, runner.mpd, runner.rpd INDEX = completeSurvey() #SD is a SurveyData object, has all of the respondents SD = read_survey(survey_file, run_data, INDEX) mydict = SD.makeDictionary() SD.groupSocial() SD.groupStarter() SD.groupQ1() SD.groupQ2() #list of runners that did not respond nonResponders = sort(run_data, SD) #list of runners that did respond surveyResponders = SD.responses plotQ1(SD) #plotQ2(SD) #starters(SD) plotSocial(SD)
def main(): import data data = data.main() models = {c: MainClauseModel() for c in data.iterkeys()} for c, m in models.iteritems(): print c, '\n' m.fit(data[c]) print '\n' break return data, models
def main(): import data data = data.main() exp = MainClauseExperiment(data) exp.run() verbreps, projection = exp.results verbs = [ 'want', 'see', 'know', 'think', 'say', 'like', 'tell', 'try', 'need', 'remember' ] verbreps[verbreps.verb.isin(verbs)].to_csv( '../bin/results/verbreps_results.csv', index=False) projection.to_csv('../bin/results/projection_results.csv', index=False) return exp
def hello(event, context): # Data Module df = data.main() # Save to S3 bucket = 'covid-19-data-etl-timothygithinji' # Covert dataframe to CSV csv_buffer = StringIO() df.to_csv(csv_buffer, encoding='utf-8', header=False, index=False) # S3 Client s3_resource = boto3.resource('s3') s3_resource.Object(bucket, 'data/data.csv').put(Body=csv_buffer.getvalue()) # SNS Client sns = boto3.client('sns') topic_arn = 'arn:aws:sns:us-east-1:144272576793:covid-19-data-etl' message = 'New data saved to S3 bucket' sns.publish(TopicArn=topic_arn, Message=message) return {"message": "New data saved to S3 bucket"}
def test_main(self): import data data.main()
graph = model() with tf.Session() as sess: coord = tf.train.Coordinator() sess.run(tf.global_variables_initializer()) summary_writer = tf.train.SummaryWriter(FLAGS.log_dir, graph=sess.graph) threads = tf.train.start_queue_runners(sess=sess, coord=coord) output = graph.pred.eval() assert (output.shape == (FLAGS.batch_size, data.num_classes)) print('dataflow OK') coord.request_stop() coord.join(threads) if __name__ == '__main__': print('Running model.py') print('\nParameters:') for attr, value in sorted(FLAGS.__flags.items()): print('{} =\t{}'.format(attr.upper(), value)) print('') print('checking DATA_DIR') if os.path.exists(FLAGS.data_dir): print('0. Found DATA_DIR') else: data.main() tf.app.run()
import tensorflow as tf import keras.backend.tensorflow_backend gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.55) session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) keras.backend.tensorflow_backend.set_session(session) ''' import numpy as np from keras.layers import Input, Dense from keras.models import Model from data import main, generate_data from keras.models import model_from_json import matplotlib.pyplot as plt #Read data train_id, train_label_c, train_label_a, valid_id, valid_label_c, valid_label_a = main( ) train = generate_data(directory=cf.DATA_CONFIG['data_folder'] + 'image_data/', augmentation=True, shuffle=True, batch_size=10, file_list=train_id, label_1=train_label_c, label_2=train_label_a) validation = generate_data(directory=cf.DATA_CONFIG['data_folder'] + 'image_data/', augmentation=False, shuffle=True, batch_size=10, file_list=valid_id,
import keras.backend.tensorflow_backend gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.65) session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) keras.backend.tensorflow_backend.set_session(session) from keras.layers import Input, Dense from keras.models import Model from data import main, generate_data from keras.models import model_from_json import matplotlib.pyplot as plt import pandas as pd import numpy as np #Read data train_id, train_label_c, valid_id, valid_label_c, test_id, test_label_c = main( ) train = generate_data(directory=cf.DATA_CONFIG['data_folder'] + 'image_data/', augmentation=True, shuffle=True, batch_size=10, file_list=train_id, label_1=train_label_c) validation = generate_data(directory=cf.DATA_CONFIG['data_folder'] + 'image_data/', augmentation=False, shuffle=True, batch_size=10, file_list=valid_id, label_1=valid_label_c)
# -*- coding: utf-8 -*- import Merge_data_set import data import class_cut import os from loguru import logger logger.add("output.log", backtrace=True, diagnose=True) try: filepath = r"C:/Users\JSKJ\Desktop\shenhe" #总文件夹所在的上级目录的路径 files = os.listdir(filepath) for name in files: Merge_data_set.main(filepath, name) data.main(filepath, name) class_cut.main(filepath, name) print("完成") except: logger.exception('error')
Sandbox for testing cluster partition resolution methods. ''' from data import main from termination_criterion import cluster_evaluation from sklearn.neighbors import BallTree from identify_centroid import centroid, determine_radius from tqdm import tqdm import numpy as np all_latent, low_d, labels = main() # centroids = [np.mean(low_d[labels == l], axis=0) for l in range(2)] dists = [low_d[labels == l] for l in np.unique(labels)] dense_centroids = np.array([centroid(d, BallTree(d))[2] for d in dists]) print(f'Two? {cluster_evaluation(low_d, labels, dense_centroids)}') # ones = low_d[labels == 0] # tree = BallTree(ones) # one_points, radius, proposal = centroid(ones, tree) # def approx_equal(one: np.ndarray, two: np.ndarray) -> bool: # '''Are the two arrays approximately equal?''' # return (one - two < 1).all()
import tensorflow as tf import keras.backend.tensorflow_backend gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.65) session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) keras.backend.tensorflow_backend.set_session(session) import numpy as np from data import main, generate_data from sklearn.metrics import roc_curve, auc, confusion_matrix from keras.models import model_from_json import matplotlib.pyplot as plt import itertools from scipy import stats #Read data train_id, train_label_c, train_label_a, train_mask, valid_id, valid_label_c, valid_label_a, valid_mask, test_id, test_label_c, test_label_a, test_mask=main() test=generate_data(directory=cf.DATA_CONFIG['data_folder'] + 'image_data/', augmentation=False, shuffle=False, batch_size=10, file_list=test_id, label_1=test_label_c, label_2=test_label_a, mask=test_mask) #Load model json_file = open(cf.DATA_CONFIG['project_folder'] + 'weights/multi.json', 'r') model_json = json_file.read() json_file.close() load_model = model_from_json(model_json) #Load weights into new model load_model.load_weights(cf.DATA_CONFIG['project_folder'] + 'weights/multi.h5') print("Loaded model from disk") def mse(y_true, y_pred): mask=[]
from youtube_statistics import YTstats from data import main API_KEY = input("Enter API Key:") channel_id = input("Enter the channel ID :") print("Please wait...") yt = YTstats(API_KEY, channel_id) yt.get_channel_statistics() yt.get_channel_video_data() file_name = yt.dump() main(file_name)
def main(start_map): ms = data.states[sl.MAP_SELECTION] ms.unlock_map(start_map) persist = {pl.restart: True, pl.selected_map: start_map} data.main("MENU", persist=persist)
def main(): convert.main() data.main() initiateModel.main()
data['size'] = each for index, result in enumerate(results): data[models[index]] = result curve.append(data) return curve if __name__ == '__main__': train_x, train_y, test_x, test_y = data.main('winequality-white.csv') classifiers = train(train_x, train_y) results = test(classifiers, test_x, test_y) print(results) #print(nn.tune(train_x, train_y, test_x, test_y, 1, 4)) # print(learning_curve(train_x, train_y, test_x, test_y))
def main(expdir, recipe, computing): """main method""" if recipe is None: raise Exception('no recipe specified. Command usage: nabu data --recipe=/path/to/recipe') if not os.path.isdir(recipe): raise Exception('cannot find recipe %s' % recipe) if expdir is None: raise Exception( 'no expdir specified. Command usage: nabu data --expdir=/path/to/recipe --recipe=/path/to/recipe') if computing not in ['standard', 'condor']: raise Exception('unknown computing mode: %s' % computing) # read the data conf file parsed_cfg = configparser.ConfigParser() parsed_cfg.read(os.path.join(recipe, 'database.conf')) cfg_sections = parsed_cfg.sections() # check which parameters are defined globaly for database if 'globalvars' in cfg_sections: globaldataconf = dict(parsed_cfg.items('globalvars')) cfg_sections.remove('globalvars') # loop over the sections in the data config for name in cfg_sections: print 'processing %s' % name # read the section conf = dict(parsed_cfg.items(name)) if conf['preprocess'] == 'True': # create the expdir for this section if not os.path.isdir(os.path.join(expdir, name)): os.makedirs(os.path.join(expdir, name)) # create the database configuration dataconf = configparser.ConfigParser() dataconf.add_section(name) for item in conf: if conf[item] == 'globalvars': dataconf.set(name, item, globaldataconf[item]) else: dataconf.set(name, item, conf[item]) with open(os.path.join(expdir, name, 'database.cfg'), 'w') as fid: dataconf.write(fid) # copy the processor config shutil.copyfile( conf['processor_config'], os.path.join(expdir, name, 'processor.cfg')) if computing == 'condor': if not os.path.isdir(os.path.join(expdir, name, 'outputs')): os.makedirs(os.path.join(expdir, name, 'outputs')) subprocess.call( ['condor_submit', 'expdir=%s' % os.path.join(expdir, name), 'nabu/computing/condor/dataprep.job']) else: data.main(os.path.join(expdir, name)) else: print 'Did not require storage.'
fpr, tpr, threshold = metrics.roc_curve(test_y, probs, pos_label=2) roc_auc = metrics.auc(fpr, tpr) print(labels[jindex], roc_auc) plt.plot(fpr, tpr, colors[jindex], label=labels[jindex]) plt.plot([0, 1], [0, 1], 'r--') plt.xlim([0, 1]) plt.ylim([0, 1]) plt.legend(loc='lower right') plt.ylabel('True Positive Rate') plt.xlabel('False Positive Rate') plt.show() if __name__ == '__main__': train_x, train_y, test_x, test_y = data.main('adult_data.csv') ###### un-comment out these lines to perform the main analysis ###### classifiers = train(train_x, train_y) results = test(classifiers, test_x, test_y) print(results) ##### un-comment out this line to perform the tuning of those models which require tuning. # tune(train_x, train_y, test_x, test_y) ##### un-comment out this line to generate the ROC curve ##### # roc(classifiers, test_x, test_y)