def main(expdir, recipe, computing):
    '''main method'''

    if recipe is None:
        raise Exception('no recipe specified. Command usage: '
                        'nabu data --recipe=/path/to/recipe')
    if not os.path.isdir(recipe):
        raise Exception('cannot find recipe %s' % recipe)
    if expdir is None:
        raise Exception('no expdir specified. Command usage: '
                        'nabu data --expdir=/path/to/recipe '
                        '--recipe=/path/to/recipe')
    if computing not in ['standard', 'condor']:
        raise Exception('unknown computing mode: %s' % computing)

    #read the data conf file
    parsed_cfg = configparser.ConfigParser()
    parsed_cfg.read(os.path.join(recipe, 'database.conf'))

    #loop over the sections in the data config
    for name in parsed_cfg.sections():

        print 'processing %s' % name

        #read the section
        conf = dict(parsed_cfg.items(name))

        if not os.path.exists(conf['dir']):
            os.makedirs(conf['dir'])
        else:
            print '%s already exists, skipping this section' % conf['dir']
            continue

        #create the expdir for this section
        if not os.path.isdir(os.path.join(expdir, name)):
            os.makedirs(os.path.join(expdir, name))

        #create the database configuration
        dataconf = configparser.ConfigParser()
        dataconf.add_section(name)
        for item in conf:
            dataconf.set(name, item, conf[item])

        with open(os.path.join(expdir, name, 'database.conf'), 'w') as fid:
            dataconf.write(fid)

        #copy the processor config
        shutil.copyfile(
            conf['processor_config'],
            os.path.join(expdir, name, 'processor.cfg'))

        if computing == 'condor':
            if not os.path.isdir(os.path.join(expdir, name, 'outputs')):
                os.makedirs(os.path.join(expdir, name, 'outputs'))
            subprocess.call(['condor_submit',
                             'expdir=%s' % os.path.join(expdir, name),
                             'nabu/computing/condor/dataprep.job'])
        else:
            data.main(os.path.join(expdir, name))
def main():
    # EDA
    da.main()

    st.header("SVM_RBF Predictor")

    # Predictor
    ma.main()
示例#3
0
def main():
    data.main()
    create_unifo_table()
    content_list = data.content
    url_list = data.url_list
    for i in range(len(content_list)):
        insert_data(content_list[i][0], content_list[i][1], content_list[i][2])

    create_url_table()
    for i in range(len(url_list)):
        insert_url(url_list[i])
示例#4
0
文件: utils.py 项目: eventh/tdt4215
def main(script):
    """Run all the functions in this module."""
    data.main()  # Populate all objects

    # Generate a LaTeX table with all stopwords
    _generate_columned_table(sorted(data.get_stopwords()),
                             6, 'stopwords', 'Norwegian stopwords')

    # Generate a LaTeX table with all medical terms
    _generate_columned_table(sorted(data.get_medical_terms()),
                             3, 'medicalterms', 'Medical terms')

    generate_cases_table()
    calculate_chapter_statistics()
    calculate_case_statistics()
示例#5
0
def main():
    try:
        credentials = tls.get_credentials_file()
    except:
        ## except credentials error and print for them to enter something
        credentials = {}
        credentials['username'] = raw_input("Plotly Username: "******"api key: ") ### get password
    py.sign_in(credentials['username'], credentials['api_key'])
    survey_file = "survey.csv"
    run_data = d.main()
    for runner in run_data.runners:
        runner.make_data()
        print runner.median
        #print runner.num , runner.total , runner.count, runner.avg, runner.dur, runner.mpd, runner.rpd
    
    INDEX = completeSurvey()
    #SD is a SurveyData object, has all of the respondents
    SD = read_survey(survey_file, run_data, INDEX)
    mydict = SD.makeDictionary()
    SD.groupSocial()
    SD.groupStarter()
    SD.groupQ1()
    SD.groupQ2()

    #list of runners that did not respond
    nonResponders = sort(run_data, SD)
    #list of runners that did respond
    surveyResponders = SD.responses
    plotQ1(SD)
    #plotQ2(SD)
    #starters(SD)
    plotSocial(SD)
示例#6
0
def main():
    import data

    data = data.main()

    models = {c: MainClauseModel() for c in data.iterkeys()}

    for c, m in models.iteritems():
        print c, '\n'
        m.fit(data[c])
        print '\n'

        break

    return data, models
def main():
    import data

    data = data.main()

    exp = MainClauseExperiment(data)
    exp.run()

    verbreps, projection = exp.results

    verbs = [
        'want', 'see', 'know', 'think', 'say', 'like', 'tell', 'try', 'need',
        'remember'
    ]

    verbreps[verbreps.verb.isin(verbs)].to_csv(
        '../bin/results/verbreps_results.csv', index=False)
    projection.to_csv('../bin/results/projection_results.csv', index=False)

    return exp
def hello(event, context):
    # Data Module
    df = data.main()

    # Save to S3
    bucket = 'covid-19-data-etl-timothygithinji'

    # Covert dataframe to CSV
    csv_buffer = StringIO()
    df.to_csv(csv_buffer, encoding='utf-8', header=False, index=False)

    # S3 Client
    s3_resource = boto3.resource('s3')
    s3_resource.Object(bucket, 'data/data.csv').put(Body=csv_buffer.getvalue())

    # SNS Client
    sns = boto3.client('sns')
    topic_arn = 'arn:aws:sns:us-east-1:144272576793:covid-19-data-etl'
    message = 'New data saved to S3 bucket'
    sns.publish(TopicArn=topic_arn, Message=message)

    return {"message": "New data saved to S3 bucket"}
示例#9
0
文件: test_data.py 项目: so07/snap
 def test_main(self):
     import data
     data.main()
示例#10
0
    graph = model()
    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        sess.run(tf.global_variables_initializer())
        summary_writer = tf.train.SummaryWriter(FLAGS.log_dir,
                                                graph=sess.graph)

        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        output = graph.pred.eval()
        assert (output.shape == (FLAGS.batch_size, data.num_classes))
        print('dataflow OK')

        coord.request_stop()
        coord.join(threads)


if __name__ == '__main__':

    print('Running model.py')
    print('\nParameters:')
    for attr, value in sorted(FLAGS.__flags.items()):
        print('{} =\t{}'.format(attr.upper(), value))
    print('')
    print('checking DATA_DIR')
    if os.path.exists(FLAGS.data_dir):
        print('0. Found DATA_DIR')
    else:
        data.main()
    tf.app.run()
示例#11
0
import tensorflow as tf
import keras.backend.tensorflow_backend
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.55)
session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
keras.backend.tensorflow_backend.set_session(session)
'''

import numpy as np
from keras.layers import Input, Dense
from keras.models import Model
from data import main, generate_data
from keras.models import model_from_json
import matplotlib.pyplot as plt

#Read data
train_id, train_label_c, train_label_a, valid_id, valid_label_c, valid_label_a = main(
)

train = generate_data(directory=cf.DATA_CONFIG['data_folder'] + 'image_data/',
                      augmentation=True,
                      shuffle=True,
                      batch_size=10,
                      file_list=train_id,
                      label_1=train_label_c,
                      label_2=train_label_a)

validation = generate_data(directory=cf.DATA_CONFIG['data_folder'] +
                           'image_data/',
                           augmentation=False,
                           shuffle=True,
                           batch_size=10,
                           file_list=valid_id,
示例#12
0
import keras.backend.tensorflow_backend
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.65)
session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
keras.backend.tensorflow_backend.set_session(session)

from keras.layers import Input, Dense
from keras.models import Model
from data import main, generate_data
from keras.models import model_from_json

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

#Read data
train_id, train_label_c, valid_id, valid_label_c, test_id, test_label_c = main(
)

train = generate_data(directory=cf.DATA_CONFIG['data_folder'] + 'image_data/',
                      augmentation=True,
                      shuffle=True,
                      batch_size=10,
                      file_list=train_id,
                      label_1=train_label_c)

validation = generate_data(directory=cf.DATA_CONFIG['data_folder'] +
                           'image_data/',
                           augmentation=False,
                           shuffle=True,
                           batch_size=10,
                           file_list=valid_id,
                           label_1=valid_label_c)
示例#13
0
# -*- coding: utf-8 -*-
import Merge_data_set
import data
import class_cut
import os
from loguru import logger
logger.add("output.log", backtrace=True, diagnose=True)
try:
    filepath = r"C:/Users\JSKJ\Desktop\shenhe"  #总文件夹所在的上级目录的路径
    files = os.listdir(filepath)
    for name in files:
        Merge_data_set.main(filepath, name)
        data.main(filepath, name)
        class_cut.main(filepath, name)
    print("完成")
except:
    logger.exception('error')
示例#14
0
    Sandbox for testing cluster partition resolution methods.

'''


from data import main
from termination_criterion import cluster_evaluation

from sklearn.neighbors import BallTree
from identify_centroid import centroid, determine_radius
from tqdm import tqdm
import  numpy as np


all_latent, low_d, labels = main()
# centroids = [np.mean(low_d[labels == l], axis=0) for l in range(2)]

dists = [low_d[labels == l] for l in np.unique(labels)]
dense_centroids = np.array([centroid(d, BallTree(d))[2] for d in dists])

print(f'Two? {cluster_evaluation(low_d, labels, dense_centroids)}')

# ones = low_d[labels == 0]
# tree = BallTree(ones)
# one_points, radius, proposal = centroid(ones, tree)


# def approx_equal(one: np.ndarray, two: np.ndarray) -> bool:
#     '''Are the two arrays approximately equal?'''
#     return (one - two < 1).all()
示例#15
0
import tensorflow as tf
import keras.backend.tensorflow_backend
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.65)
session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
keras.backend.tensorflow_backend.set_session(session)

import numpy as np
from data import main, generate_data
from sklearn.metrics import roc_curve, auc, confusion_matrix
from keras.models import model_from_json
import matplotlib.pyplot as plt
import itertools
from scipy import stats

#Read data
train_id, train_label_c, train_label_a, train_mask, valid_id, valid_label_c, valid_label_a, valid_mask, test_id, test_label_c, test_label_a, test_mask=main()

test=generate_data(directory=cf.DATA_CONFIG['data_folder'] + 'image_data/', augmentation=False, shuffle=False, batch_size=10, file_list=test_id, label_1=test_label_c, label_2=test_label_a, mask=test_mask)

#Load model
json_file = open(cf.DATA_CONFIG['project_folder'] + 'weights/multi.json', 'r')
model_json = json_file.read()
json_file.close()
load_model = model_from_json(model_json)
#Load weights into new model
load_model.load_weights(cf.DATA_CONFIG['project_folder'] + 'weights/multi.h5')
print("Loaded model from disk")


def mse(y_true, y_pred):
	mask=[]
from youtube_statistics import YTstats
from data import main

API_KEY = input("Enter API Key:")
channel_id = input("Enter the channel ID :")
print("Please wait...")

yt = YTstats(API_KEY, channel_id)
yt.get_channel_statistics()
yt.get_channel_video_data()
file_name = yt.dump()
main(file_name)
示例#17
0
def main(start_map):
    ms = data.states[sl.MAP_SELECTION]
    ms.unlock_map(start_map)
    persist = {pl.restart: True, pl.selected_map: start_map}
    data.main("MENU", persist=persist)
def main():
    convert.main()
    data.main()
    initiateModel.main()
示例#19
0
文件: main.py 项目: aten2001/CS7641-8
		data['size'] = each 

		for index, result in enumerate(results):
			data[models[index]] = result

		curve.append(data)

	return curve





if __name__ == '__main__':

	train_x, train_y, test_x, test_y  = data.main('winequality-white.csv')
	classifiers = train(train_x, train_y)
	results = test(classifiers, test_x, test_y)
	print(results)

	#print(nn.tune(train_x, train_y, test_x, test_y, 1, 4))

	# print(learning_curve(train_x, train_y, test_x, test_y))







示例#20
0
def main(expdir, recipe, computing):
	"""main method"""
		
	if recipe is None:
		raise Exception('no recipe specified. Command usage: nabu data --recipe=/path/to/recipe')
	if not os.path.isdir(recipe):
		raise Exception('cannot find recipe %s' % recipe)
	if expdir is None:
		raise Exception(
			'no expdir specified. Command usage: nabu data --expdir=/path/to/recipe --recipe=/path/to/recipe')
	if computing not in ['standard', 'condor']:
		raise Exception('unknown computing mode: %s' % computing)

	# read the data conf file
	parsed_cfg = configparser.ConfigParser()
	parsed_cfg.read(os.path.join(recipe, 'database.conf'))
	cfg_sections = parsed_cfg.sections()
	
	# check which parameters are defined globaly for database
	if 'globalvars' in cfg_sections:
		globaldataconf = dict(parsed_cfg.items('globalvars'))

		cfg_sections.remove('globalvars')

	# loop over the sections in the data config
	for name in cfg_sections:

		print 'processing %s' % name

		# read the section
		conf = dict(parsed_cfg.items(name))

		if conf['preprocess'] == 'True':
			# create the expdir for this section
			if not os.path.isdir(os.path.join(expdir, name)):
				os.makedirs(os.path.join(expdir, name))

			# create the database configuration
			dataconf = configparser.ConfigParser()
			dataconf.add_section(name)
			for item in conf:
				if conf[item] == 'globalvars':
					dataconf.set(name, item, globaldataconf[item])
				else:
					dataconf.set(name, item, conf[item])

			with open(os.path.join(expdir, name, 'database.cfg'), 'w') as fid:
				dataconf.write(fid)

			# copy the processor config
			shutil.copyfile(
				conf['processor_config'],
				os.path.join(expdir, name, 'processor.cfg'))

			if computing == 'condor':
				if not os.path.isdir(os.path.join(expdir, name, 'outputs')):
					os.makedirs(os.path.join(expdir, name, 'outputs'))
				subprocess.call(
					['condor_submit', 'expdir=%s' % os.path.join(expdir, name), 'nabu/computing/condor/dataprep.job'])
			else:
				data.main(os.path.join(expdir, name))

		else:
			print 'Did not require storage.'
示例#21
0
        fpr, tpr, threshold = metrics.roc_curve(test_y, probs, pos_label=2)
        roc_auc = metrics.auc(fpr, tpr)
        print(labels[jindex], roc_auc)

        plt.plot(fpr, tpr, colors[jindex], label=labels[jindex])
        plt.plot([0, 1], [0, 1], 'r--')
        plt.xlim([0, 1])
        plt.ylim([0, 1])

    plt.legend(loc='lower right')
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()


if __name__ == '__main__':

    train_x, train_y, test_x, test_y = data.main('adult_data.csv')

    ###### un-comment out these lines to perform the main analysis ######
    classifiers = train(train_x, train_y)
    results = test(classifiers, test_x, test_y)
    print(results)

    ##### un-comment out this line to perform the tuning of those models which require tuning.
    # tune(train_x, train_y, test_x, test_y)

    ##### un-comment out this line to generate the ROC curve #####
    # roc(classifiers, test_x, test_y)