def test_read_csv(self): """ Test procedure for the function read_csv() """ # Access the file relative to this one, not the user's terminal parent = os.path.split(__name__)[0] # First test fpath = os.path.join(parent, 'files', 'readcsv1.csv') table = read_csv(fpath) self.assertEqual(type(table), list) self.assertTrue(len(table) > 0 and type(table[0]) == list) self.assertTrue(len(table[0]) > 0 and type(table[0][0]) == str) self.assertEqual(table, FILE1) # Second test fpath = os.path.join(parent, 'files', 'readcsv2.csv') table = read_csv(fpath) self.assertEqual(type(table), list) self.assertTrue(len(table) > 0 and type(table[0]) == list) self.assertTrue(len(table[0]) > 0 and type(table[0][0]) == str) self.assertEqual(table, FILE2) print("read_csv complete")
def test_002a(): csvfilepath = 'weather-data/indoor-temperature-1617.csv' csv = read_csv(csvfilepath) assert len(csv) == 354 df = pd.DataFrame(csv) print(df.head()) df["Humidity"] = pd.to_numeric(df["Humidity"], errors='coerce') df["Temperature"] = pd.to_numeric(df["Temperature"], errors='coerce') df["Temperature_range (low)"] = pd.to_numeric( df["Temperature_range (low)"], errors='coerce') df["Temperature_range (high)"] = pd.to_numeric( df["Temperature_range (high)"], errors='coerce') # Compare with Pandas CSV reader: df2 = pd.read_csv(csvfilepath) df2["Humidity"] = pd.to_numeric(df2["Humidity"], errors='coerce') df2["Temperature"] = pd.to_numeric(df2["Temperature"], errors='coerce') df2["Temperature_range (low)"] = pd.to_numeric( df2["Temperature_range (low)"], errors='coerce') df2["Temperature_range (high)"] = pd.to_numeric( df2["Temperature_range (high)"], errors='coerce') assert all(df["Humidity"] == df2["Humidity"]) assert all(df["Humidity"].describe() == df2["Humidity"].describe()) assert all(df["Temperature"] == df2["Temperature"]) assert all(df["Temperature"].describe() == df2["Temperature"].describe()) assert all(df["Temperature_range (low)"] == df2["Temperature_range (low)"]) assert all(df["Temperature_range (low)"].describe() == df2["Temperature_range (low)"].describe())
def test_004(): csv = read_csv('weather-data/rainfall-1617.csv') assert len(csv) == 353 df = pd.DataFrame(csv) print(df.head()) df["mm"] = pd.to_numeric(df["mm"], errors='coerce') print(df["mm"].describe())
def run_MLP(params): config = config_reader.read_config(utils.abs_path_of("config/default.ini")) if not os.path.isdir(config.get_rel_path("PATHS", "checkpoint_dir")): utils.mkdir_recursive(config.get_rel_path("PATHS", "checkpoint_dir")) iris_runner = mlp.FCNRunner(config, params) # trows, vrows, test_rows, config) if "TRAINING" in config: with tf.name_scope("train_data"): #train_batch_size = config.getint("TRAINING", "batch_size") train_batch_size = params['batch_size'] stratified_task = config.get("TRAINING", "stratified_sampling", fallback="") trows = csv_reader.read_csv( config.get_rel_path("PATHS", "training_file"), train_batch_size, stratified_task, config) with tf.name_scope("validation_data"): vrows = csv_reader.read_csv( config.get_rel_path("PATHS", "validation_file"), config.getint("TRAINING", "validation_batch_size")) iris_runner.bind_training_dataqueue(trows, params) iris_runner.bind_validation_dataqueue(vrows) ''' if "TEST" in config: test_path = config.get_rel_path("TEST","test_file") with tf.name_scope("test_data"): test_rows = csv_reader.read_test_csv(test_path, int(config["TEST"]["batch_size"])) iris_runner.bind_test_dataqueue(test_rows) ''' iris_runner.initialize() if "TRAINING" in config: valid_loss = iris_runner.run_training() #if "TEST" in config: #iris_runner.run_test() return valid_loss
def question_body_stream(self): global tokenizer tokenizer = self.tokenizer csv = read_csv("output/stackoverflow-data/questions.csv", set(["Id", "Body"]), subsample=self.subsample, limit=self.limit) pool = multiprocessing.Pool(self.processes) for Id, tokens in pool.imap(_process_row, csv, chunksize=10000): if tokens: yield tokens else: print "ERROR in row %s" % Id pool.terminate()
def read_csv_calculate_stats(csv_file): # Read CSV document: print("\nAttempting to read {}:".format(csv_file)) csv = read_csv(csv_file, header=True) # Convert CSV document to Pandas dataframe: df_indoor = pd.DataFrame(csv) # Calculate basic statistics on Humidity: print("\nCalculate basic statistics for Humidity:".format(csv_file)) df_indoor["Humidity"] = pd.to_numeric(df_indoor["Humidity"], errors='coerce') humidity_stats = df_indoor["Humidity"].describe() return humidity_stats
def test_001(): csvfilepath = 'weather-data/barometer-1617.csv' csv = read_csv(csvfilepath) assert len(csv) == 355 # Compare with Pandas CSV reader: df = pd.DataFrame(csv) df["Baro"] = pd.to_numeric(df["Baro"], errors='coerce') df2 = pd.read_csv(csvfilepath) df2["Baro"] = pd.to_numeric(df2["Baro"], errors='coerce') assert all(df["Baro"] == df2["Baro"]) assert all(df["Baro"].describe() == df2["Baro"].describe())
def test_003(): csv = read_csv('weather-data/outside-temperature-1617.csv') assert len(csv) == 355 df = pd.DataFrame(csv) print(df.head()) df["Temperature"] = pd.to_numeric(df["Temperature"], errors='coerce') df["Temperature_range (low)"] = pd.to_numeric( df["Temperature_range (low)"], errors='coerce') df["Temperature_range (high)"] = pd.to_numeric( df["Temperature_range (high)"], errors='coerce') print(df["Temperature"].describe()) print(df["Temperature_range (low)"].describe()) print(df["Temperature_range (high)"].describe())
def test_000(): csvfilepath = 'weather-data/testcsverror001_indoor-temperature-1617.csv' csv = read_csv(csvfilepath) print(csv) assert len(csv) == 6 csvfilepath = 'weather-data/testnoheader002_indoor-temperature-1617.csv' csv = read_csv(csvfilepath, header=True) assert len(csv) == 1 # Compare with Pandas CSV reader: df = pd.DataFrame(csv) df2 = pd.read_csv(csvfilepath, error_bad_lines=False) df[df.columns[1]] = pd.to_numeric(df[df.columns[1]], errors='coerce') df2[df2.columns[1]] = pd.to_numeric(df2[df2.columns[1]], errors='coerce') print(df2) print(df) assert all(df[df.columns[1]] == df2[df2.columns[1]]) csv = read_csv('weather-data/testnoheader002_indoor-temperature-1617.csv', header=False) assert len(csv) == 2 csv = read_csv( 'weather-data/testemptyspaces003_indoor-temperature-1617.csv', header=True) assert len(csv) == 1 csv = read_csv( 'weather-data/testemptyspaces003_indoor-temperature-1617.csv', header=False) assert len(csv) == 2 csv = read_csv('weather-data/testbadheader004_indoor-temperature-161.csv', header=True) assert len(csv) == 2 csv = read_csv('weather-data/testbadheader004_indoor-temperature-161.csv', header=False) assert len(csv) == 2
def get_Sapp(self): # Get the data from a csv file data = cr.read_csv(self.file) attributs_name = [] # Get the differents attributs names for name in data[0]: attributs_name.append(name) attributs_val = [] # Get the differents value for every attributs for i in range(0, len(attributs_name)): val = [] for j in range(1, len(data)): if data[j][i] not in val: val.append(data[j][i]) attributs_val.append(val) return attributs_name, attributs_val, data[1:]
def fillDatabase(): data = read_csv() session = Session() for c in data: id = c.get('id') name = c.get('name') belongs_to = c.get('belongs_to') if belongs_to != '99' and belongs_to != '': d = Constituency(id=id, name=name, belongs_to=belongs_to) for i in c.get('parties'): party = get_party_by_name(i.get('name')) party_id = [] if not party is None: party_id = party.id else: party = Party(name=i.get('name')) session.add(party) session.flush() party_id = party.id session.add(d) session.commit() votes = Vote( party_id=party.id, constituency_id=id, first_provisional_votes=i.get('first').get('provisional'), first_previous_votes=i.get('first').get('previous'), second_provisional_votes=i.get('second').get( 'provisional'), second_previous_votes=i.get('second').get('previous')) session.add(votes) session.commit() else: d = State(id=id, name=name, belongs_to='99') session.add(d) session.commit()
# # Copyright 2017 by InfoMus Lab - DIST - University of Genova, http://www.infomus.org # # execfile( 'csv_test.py' ) import csv from sensor_info import * from csv_reader import read_csv t = read_csv('sculpture.csv') print str(t) print 'Loading infrared sensors info from "file"' print 'End of script'
import sys, os config = config_reader.read_config(utils.abs_path_of("config/default.ini")) if not os.path.isdir(config.get_rel_path("PATHS", "checkpoint_dir")): utils.mkdir_recursive(config.get_rel_path("PATHS", "checkpoint_dir")) iris_runner = mlp.FCNRunner(config) # trows, vrows, test_rows, config) if "TRAINING" in config: with tf.name_scope("train_data"): train_batch_size = config.getint("TRAINING", "batch_size") stratified_task = config.get("TRAINING", "stratified_sampling", fallback="") trows = csv_reader.read_csv( config.get_rel_path("PATHS", "training_file"), train_batch_size, stratified_task, config) with tf.name_scope("validation_data"): vrows = csv_reader.read_csv( config.get_rel_path("PATHS", "validation_file"), config.getint("TRAINING", "validation_batch_size")) iris_runner.bind_training_dataqueue(trows) iris_runner.bind_validation_dataqueue(vrows) if "TEST" in config: test_path = config.get_rel_path("TEST", "test_file") with tf.name_scope("test_data"): test_rows = csv_reader.read_csv(test_path, int(config["TEST"]["batch_size"]))
import matplotlib.pyplot as plt locations = [] def onclick(event): x = event.xdata y = event.ydata plt.scatter(x, y) locations.append([x, y]) plt.show() # Get all necessary inputs from the user filename = input("Enter CSV filename: ") csv_data, columns = read_csv(filename) additional_n = int( input( "Your data has {} columns; If you have static data to add enter the number (Enter 0 if you have none): " .format(len(columns)))) additional_data = [] print("Enter your static data: ") for _ in range(additional_n): additional_data.append(input("::: ")) csv_data = [(data + additional_data) for data in csv_data] template_name = input("Enter the Template path: ") im = plt.imread(template_name)
import csv_reader as cs import clean_data as cl if __name__ == "__main__": print "Main" enrollments = cs.read_csv('data/enrollments.csv') daily_engagement = cs.read_csv('data/daily_engagement.csv') project_submissions = cs.read_csv('data/project_submissions.csv') print(enrollments[0]) print(daily_engagement[0]) print(project_submissions[0]) # CLEAN UP THE DATA TYPE for enrollment in enrollments: enrollment['cancel_date'] = cl.parse_date(enrollment['cancel_date']) enrollment['days_to_cancel'] = cl.parse_maybe_int(enrollment['days_to_cancel']) enrollment['is_canceled'] = enrollment['is_canceled'] == 'True' enrollment['is_udacity'] = enrollment['is_udacity'] == 'True' enrollment['join_date'] = cl.parse_date(enrollment['join_date']) for engagement_record in daily_engagement: engagement_record['lessons_completed'] = int(float(engagement_record['lessons_completed'])) engagement_record['num_courses_visited'] = int(float(engagement_record['num_courses_visited'])) engagement_record['projects_completed'] = int(float(engagement_record['projects_completed'])) engagement_record['total_minutes_visited'] = float(engagement_record['total_minutes_visited']) engagement_record['utc_date'] = cl.parse_date(engagement_record['utc_date']) for submission in project_submissions: submission['completion_date'] = cl.parse_date(submission['completion_date'])
from csv_reader import read_csv import pandas as pd import time if __name__ == "__main__": csvfilepath = 'weather-data/indoor-temperature-1617.csv' t1 = time.time() csv = read_csv(csvfilepath) t2 = time.time() assert len(csv) == 354 df = pd.DataFrame(csv) print(df.head()) df["Humidity"] = pd.to_numeric(df["Humidity"], errors='coerce') df["Temperature"] = pd.to_numeric(df["Temperature"], errors='coerce') df["Temperature_range (low)"] = pd.to_numeric( df["Temperature_range (low)"], errors='coerce') df["Temperature_range (high)"] = pd.to_numeric( df["Temperature_range (high)"], errors='coerce') # Compare with Pandas CSV reader: t3 = time.time() df2 = pd.read_csv(csvfilepath) t4 = time.time() print("Reading {} lines in {} compared to Pandas reader in {}".format( df2.shape[0], t2 - t1, t4 - t3))
# importing the required module import csv_reader import dr_harsha_filter import plot_data import moving_average_filter # Defines window size for moving window filter window_size = 100 # Defines file name to read data file_name = 'assets/data_set.csv' # Reads data from the csv file raw_data = csv_reader.read_csv(file_name) # x values of data set x = raw_data[0] # y values of data set y = raw_data[1] # Plots raw data plot_data.plot(x, y, 'Raw Data') # Filters data from Dr. Harasha's filter filtered_data_from_dr_harsha_filter = dr_harsha_filter.filter_by_dr_harsha_filter( y, 0.05) # Plots data recieved from Dr. Harsha's filter plot_data.plot(x, filtered_data_from_dr_harsha_filter, 'Dr. Harsha\'s Filter Data')
import sys from person import Person from person import save_people from csv_reader import read_csv from csv_reader import Relation CSV_FILENAME = sys.argv[1] COMMON_PARENT_NAME = 'NO NAME PARENT' NEW_CHILD_NAME = 'NO NAME PERSON' relationships = read_csv(CSV_FILENAME) people = {} name_to_person = {} next_person_id = 1 RELATION_ORDER = { Relation.FATHER: 0, Relation.MOTHER: 0, Relation.HUSBAND: 0, Relation.WIFE: 0, Relation.SON: 1, Relation.DAUGHTER: 1, Relation.BROTHER: 10, Relation.SISTER: 10, Relation.SONS_WIFE: 20, Relation.DAUGHTERS_HUSBAND: 20, Relation.WIFES_FATHER: 21, Relation.WIFES_MOTHER: 21,
def show_graphs(): data_dict = cr.split_data_coins(*cr.read_csv()) coin = select_currency(data_dict) dg.graph_coin(data_dict, coin)
import sys from csv_reader import read_csv import pandas as pd if __name__ == "__main__": """ Invokes CSV reader with target CSV file as the first argument Usage: python csv_cli.py [path_to_csv_file] """ # Select CSV document to read: if len(sys.argv) > 1: # User chosen CSV document: csv_file = sys.argv[1] # Read CSV document: print("\nAttempting to read {}:".format(csv_file)) csv = read_csv(csv_file, header=True) # Convert CSV document to Pandas dataframe: print( "\nAttempting to convert to Pandas dataframe {}:".format(csv_file)) df_indoor = pd.DataFrame(csv) print(df_indoor.head()) else: # Default example CSV document: print("Usage: csv_cli.py [path_to_csv_file]")
def model_currency(k=10): """ Creates a neural network to model a currency. :param k: The degree of cross-validation to be performed. :return: """ coin_dict, data = cr.read_csv() data = cr.split_data_coins(coin_dict, data) coin = select_currency(data) data = data[coin] model_weights = [] model_errors = [] split_data = cr.split_data(data, k) split_data = [[[float(e[2]), float(e[5]), float(e[3]), float(e[4])] for e in s] for s in split_data] print("Modeling neural networks with k-fold cross-validation") for i in range(k): model = m.create_model(4, [8, 8, 2]) raw_data = split_data[:i] + split_data[i+1:] training_data = np.array([s[:-1] for s in raw_data]) m.train_model(model, training_data, np.array([to_expected(s) for s in raw_data])) error = m.test_model(model, np.array([split_data[i][:-1]]), np.array([to_expected(split_data[i])])) model_weights.append(np.array(m.get_weights(model))) model_errors.append(error[0]) sum_error = sum(1/e for e in model_errors) for idx, error in enumerate(model_errors): proportion = (1/error)/sum_error model_weights[idx] = proportion * model_weights[idx] true_weights = sum(model_weights) true_model = m.create_model(4, [8, 8, 2]) m.set_weights(true_model, true_weights) while True: print("For how long would you like to invest?") steps = input("Choice: ") try: steps = int(steps) assert steps > 0 except ValueError or AssertionError: print("That was not a valid amount of time.") break revenue = m.predict_model(true_model, np.array([[split_data[-1][-1]]]), steps) error = m.test_model(true_model, np.array([s[:-1] for s in split_data]), np.array([to_expected(s) for s in split_data])) multiply = [1, 1] for r in revenue: multiply[0] *= r[0][0] multiply[1] *= r[0][1] print("Expected revenue: {} with error percentage at: {}%".format(multiply, error[0]*100)) return revenue, error
import os import csv_reader import summarizer import intermediate input_filename = 'Piano-Hero-1_Overall_Fudged_Testing_Data.csv' input_filepath = os.path.join(os.getcwd(), input_filename) output_filename = 'Piano-Hero-1-Data-Summary.csv' output_filepath = os.path.join(os.getcwd(), output_filename) intermediate_output_filename = 'Piano-Hero-1-Intermediate-Output.csv' intermediate_output_filepath = os.path.join(os.getcwd(), intermediate_output_filename) intermediate_data_dict = csv_reader.read_csv(input_filepath) summarizer.data_summary(intermediate_data_dict, output_filepath) intermediate.intermediate_data(intermediate_data_dict, intermediate_output_filepath)
import cv2 import csv_reader as csvr import email_sender as esender import os # Script Settings isDebug = True # Setting this into True will prevent the script to send the actual email to intended recipients attendee_list = csvr.read_csv("src/csv/ListOfAttendees-Test.csv") template_path = "src/template/globalaibootcamp-template.png" # Specify email template path imageWxH = (1360, 1024) font = cv2.FONT_HERSHEY_SIMPLEX fontScale = 2 fontColor = (0,0,0) # Black lineType = 2 certificate_folder = "certificate" # Email Details emailSubject = "INSERT EVENT NAME" def generate_email_body(name): return ("***This email and attachment is automatically generated using a script. *** <br><br>" "Hi " + name + ", <br><br>" "INSERT YOUR EMAIL BODY HERE" ) def send_certificate(): """ Iterate through the attendee list from the CSV file, Insert the attendee name on the center of the certificate image, then opens Outlook Application and sends the email to the recipients that includes the certificate attachment """
parser = argparse.ArgumentParser(description='Send CSV over to API') parser.add_argument('--csv', help='csv data file path', required=False, default='data.csv') parser.add_argument('--json', help='data template file path', required=False, default='template.json') parser.add_argument('--config', help='Config file path', required=False, default='config.yml') return parser.parse_args() if __name__ == '__main__': cli_args = parse_cli() config = get_config(cli_args) data = csv_reader.read_csv(cli_args.csv) template = template_reader.json_reader('template.json') multi_json_to_post = merge(template, data) for json_to_post in multi_json_to_post: res = api.post('http://mockbin.com/request', '', '', json_to_post) logger.info('Results %s', json.dumps(res.json(), indent=4, sort_keys=True))