def valid_gps_file(gps_file): """ Check if the content of the given gps file is valid. Parameter --------- gps_file : str The path of the gps file Return ------ True if the content of the file is value; False, otherwise. """ if not os.path.isfile(gps_file): return False try: time_speed = utils.read_csv_file(gps_file, columns=[1, 4]) # time, speed ave_time = (time_speed[-1][0] - time_speed[0][0]) / 1000.0 / len(time_speed) if ave_time > 5: # TODO: might need to be adjusted print("average interval of GPS samples: %.2f seconds, which is too large." % ave_time) return False except: return False return True
def get_gravity_component(trip): """ Get gravity component from the accelerometer readings. either use low filter to get the constant component for each axis, or find the stationary periods and get the 3 values at the same time. Parameters --------- trip : str Folder path Return ------ rotation matrix : 2-D array shape 3*3 """ if debug: print('get gravity component') # TODO: should we save the parameters to file? # so that we don't need to recalculate? # in the end, saving the full calibration parameter should be enough gravity_component = [0.0] * 3 acc_file = os.path.join(trip, constants.ACC_FILE_NAME) acc = utils.read_csv_file(acc_file, columns=[1, 3, 4, 5]) gravity_component = get_gravity_from_acc(acc) return gravity_component
def get_calibration_parameters(trip, require_obd, overwrite=False): """ Get the calibration parameters for a single trip, and save them into a file under current folder. Parameters: ---------- trip : str Folder path require_obd : boolean If True, then OBD file will be needed for calibration overwrite : boolean, default=False If True, overwrite the existing calibration parameter file. If False and the calibration parameter file already exists, then read the parameters from file and return. Returns ------- calibration parameters : 1D array [1*9], i.e. [Ix, Iy, Iz, Jx, Jy, Jz, Kx, Ky, Kz] """ if debug: print("get calibration parameters for trip: %s" % trip) if not overwrite: calib_file = os.path.join(trip, constants.CALIBRATION_FILE_NAME) if os.path.isfile(calib_file): if debug: print("%s already exists. And overwrite is set to be %s. Skip." % (calib_file, overwrite)) with open(calib_file, 'r') as fp: line = fp.readline() parameters = line.rstrip().split(',') calibration_parameters = [float(p) for p in parameters] return calibration_parameters acc_file = os.path.join(trip, constants.ACC_FILE_NAME) acc = utils.read_csv_file(acc_file, columns=[1, 3, 4, 5]) # get numpy array gravity_component = get_gravity_from_acc(acc) acc_wt_gravity = remove_gravity_component(acc, gravity_component) j = get_j(trip, acc_wt_gravity, gravity_component, require_obd=require_obd) gravity_component_norm = norm_vector(gravity_component) j_norm = norm_vector(j) i = np.cross(j_norm, gravity_component_norm) ans = i.tolist() ans.extend(j_norm) ans.extend(gravity_component_norm) output_file = os.path.join(trip, constants.CALIBRATION_FILE_NAME) with open(output_file, 'w') as fp: fp.write(','.join([str(a) for a in ans])) return ans
def __init__(self, filedir: str, filename: str, categorical_columns: List): full_file_name = os.path.join(filedir, filename) self.data = read_csv_file(full_file_name) self.categorical_columns = categorical_columns self.continuous_columns = [ d for d in self.data if d not in categorical_columns ] self.levels = self._get_levels() self.filter_category = self.levels > 0 self.filter_continuous = self.levels == 0 self.data = self.data.to_numpy()
def main(): """ Reads all event_data files, concatenates them to one dataframe and writes them into file. """ print("Reading event_data files...") event_data_new = pd.concat([ utils.read_csv_file(file) for file in utils.get_csv_from_dir("event_data") ]) print("Writing event_data_new file...") event_data_new.to_csv("event_data_new.csv", index=False) print("Done.")
def main(): preparer = PreparerQuery1() # print(os.getcwd()) values = read_csv_file("../data/event_data_new.csv") print(values.to_string) isdf = isinstance(values, pd.DataFrame) # print(isdf) print("----------------------") pp_values = preparer.prepare(values) print(pp_values.to_string) print("----------------------*****") # print(values["artist"]) print(preparer.prepare(values)["artist_name"])
def valid_gps(root, gps_max_interval, min_duration): """ Check the gps file is valid or not Parameters ---------- root : str The folder contains the gps file. gps_max_interval : int The maximum sampling interval of a good trip. min_duration : int The minimum duration of a good trip. Unit is minute. Return ------ valid : bool True if the gps file is good. Fasle, otherwise. """ gps_file = os.path.join(root, constants.GPS_FILE_NAME) if not valid_gps_file(gps_file): if debug: print("invalid gps file: %s" % root) return False time_speed = read_csv_file(gps_file, columns=[1, 4]) trip_duration = (time_speed[-1][0] - time_speed[0][0]) / 1000.0 # seconds ave_time = trip_duration / len(time_speed) if ave_time > gps_max_interval: if debug: print("Trip: %s" % root) print("average interval of GPS samples: %.2f seconds, which is too large." % ave_time) return False if trip_duration / 60.0 < min_duration: if debug: print("Trip: %s" % root) print("Trip is too short: %.2f minutes." % (trip_duration / 60.0)) return False if debug: print("Trip: %s" % root) print("\tAverage interval of GPS samples: %.2f seconds, which is good." % ave_time) print("\tTrip length is: %.2f minutes." % (trip_duration / 60.0)) return True
def load_concepts(file_path, concept1, cnx): concepts_read = 0 concepts_inserted = 0 concepts_errors = 0 row = 2 for line in utils.read_csv_file(file_path, delimiter='\t'): concept = concepts_file_parser.get_concepts(line) concepts_read += 1 try: if (len(concept['pxordx']) != 0 and # Verificamos algunas columnas que no tiene valores nulos len(concept['codetype']) != 0 and len(concept['vocabulary_id']) != 0 and len(concept['domain_id']) != 0 and len(concept['code']) != 0): # Add new concept to dictionary # Ya que no hay una columna que sus datos sean unicos, generamos nuestra propia llave combinando dos # atributos, 'concept_id' es casi unica, pero tiene datos vacios, entonces la concateno con 'code'. ref1 = concept['concept_id'] if ref1 != None: concepts_ref = concept['code'] + ref1 else: concepts_ref = concept['code'] if (concepts_ref not in concept1): id = database.add_concepts(concept, cnx) concept[concepts_ref] = id logger.info("Inserting concept code {0} in database.".format(concepts_ref)) concepts_inserted += 1 else: logger.info("concept code {0} already exists in database.".format(concepts_ref)) else: message = "Error in row: %d, missing fields to create new concept." % row logger.error(message) print(message) concepts_errors += 1 except Exception as e: message = str(e) + " file: {0} - row: {1}".format(file_path, row) logger.error(message) print(message) concepts_errors += 1 row += 1
def load_vocabularies(file_path, vocabularies, cnx): vocabulary_read = 0 vocabulary_inserted = 0 vocabulary_errors = 0 row = 2 for line in utils.read_csv_file(file_path, delimiter='\t'): vocabulary = vocabulary_file_parser.get_vocabulary(line) vocabulary_read += 1 try: if (len(vocabulary['ref']) != 0 and len(vocabulary['name']) != 0 and len(vocabulary['url']) != 0 and len(vocabulary['version']) != 0 and len(vocabulary['description']) != 0 and len(vocabulary['status']) != 0): # Add new vocabulary to dictionary vocabulary_ref = vocabulary['ref'].strip() if vocabulary_ref not in vocabularies: id = database.add_vocabulary(vocabulary, cnx) vocabularies[vocabulary_ref] = id logger.info( "Inserting vocabulary ref {0} in database.".format( vocabulary['ref'])) vocabulary_inserted += 1 else: logger.info( "Vocabulary ref {0} already exists in database.". format(vocabulary['ref'])) else: message = "Error in row: %d, missing fields to create new vocabulary." % row logger.error(message) print(message) vocabulary_errors += 1 except Exception as e: message = str(e) + " file: {0} - row: {1}".format(file_path, row) logger.error(message) print(message) vocabulary_errors += 1 return False row += 1 return True
def load_concepts(file_path, conceptsDIC, cnx): concepts_read = 0 concepts_inserted = 0 concepts_errors = 0 row = 2 for line in utils.read_csv_file(file_path, delimiter='\t'): concepts = concepts_file_parser.get_concepts(line) concepts_read += 1 try: if (concepts['pxordx'] != None and concepts['codetype'] != None and concepts['concept_id'] != None and concepts['vocabulary_id'] != None and concepts['domain_id'] != None): #if (concepts['code']) != None: # Add new concepts to dictionary #code = concepts['code'].strip() concept_id = concepts['concept_id'].strip() if concept_id not in conceptsDIC: id = database.add_concepts(concepts, cnx) conceptsDIC[concept_id] = id logger.info( "Inserting concepts code {0} in database.".format( concepts['concept_id'])) concepts_inserted += 1 else: logger.info( "concepts code {0} already exists in database.".format( concepts['concept_id'])) else: message = "Error in row: %d, missing fields to create new concepts." % row logger.error(message) print(message) concepts_errors += 1 except Exception as e: message = str(e) + " file: {0} - row: {1}".format(file_path, row) logger.error(message) print(message) concepts_errors += 1 return False row += 1 return True
def sort_by_symmetry(self, order, type, h): #order : h - l #type: b - m #h: 2-3 if type == 'b': nType = 0 else: nType = 1 if h == '2': nH = 0 else: nH = 1 symmetries = read_csv_file() measure = symmetries[nType][nH] the_high = reverseSortList(measure) if order == 'h': return the_high else: return specialSortList(the_high)
def valid_gps_file(gps_file, max_interval=None): """ Check if the content of the given gps file is valid. Parameter --------- gps_file : str The path of the gps file max_interval : int, default=None The maximum average sample interval, seconds Return ------ True if the content of the file is value; False, otherwise. """ if not os.path.isfile(gps_file): return False try: time_speed = utils.read_csv_file(gps_file, columns=[1, 4]) # time, speed if len(time_speed) == 0: return False if max_interval: ave_time = (time_speed[-1][0] - time_speed[0][0]) / 1000.0 / len(time_speed) if ave_time > max_interval: print( "average interval of GPS samples: %.2f seconds, which is too large." % ave_time) return False except: return False return True
import csv import json from typing import List, Dict, Callable import difflib from operator import itemgetter import utils import re from location import Location bols = utils.read_csv_file('spreadsheet_data/da-base-OLTP - BillOfLading.csv') addresses = utils.read_csv_file('spreadsheet_data/da-base-OLTP - Address.csv') commodities = utils.read_csv_file( 'spreadsheet_data/da-base-OLTP - Address.csv') containers = utils.read_csv_file('spreadsheet_data/da-base-OLTP - Address.csv') container_models = utils.read_csv_file( 'spreadsheet_data/da-base-OLTP - Address.csv') ports = utils.read_csv_file('spreadsheet_data/da-base-OLTP - Port.csv') business_entities = utils.read_csv_file( 'spreadsheet_data/da-base-OLTP - BusinessEntity.csv') # countries = utils.read_csv_file('data/ISO 3166 (countries).csv') # cities = utils.read_csv_file('data/worldcities.csv') # country_pop = utils.read_csv_file('country-pop-cum.csv') def convert(list_of_dicts: List[Dict], keys: List[str], callable: Callable): for key in keys: for dct in list_of_dicts:
def _get_rebalance_event_report_data(self): return utils.read_csv_file( utils.open_downloaded_file( self, "Rebalance Event {0}.csv".format( datetime.now().strftime("%d%m%y")), "rt"))
def scatter_plot(scatter, name, ylabel, title, xlabel): """Makes a plot with the given parameters""" plt.title(title) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.scatter(scatter[0], scatter[1], s=0.5) plt.plot([0, 1], [0, 1], c='orange', lw=2) plt.savefig(RESULTS_DIR + '/' + EXPERIMENT_LABEL + name, dpi=300) plt.close() if __name__ == "__main__": # Reads data from shcu_typical_data.csv, takes a sample of the times, and gets data for plotting typical_arscl_dataframe = read_csv_file( TYPICAL_DATA_CSV) # Contains both ARSCL and TSI Data typical_arscl_dataframe = typical_arscl_dataframe.dropna( subset=['fsc_z', 'cf_tot', 'timestamp_utc']) typical_times = load_pickled_file(TYPICAL_VALID_FILE) print('after unpickling') # print(typical_times) typical_times = typical_times[0:N_SAMPLES] print('after 0:N_SAMPLES') # print(typical_times) typical_arscl_dataframe = typical_arscl_dataframe[ typical_arscl_dataframe['timestamp_utc'].isin(typical_times)] typical_arscl_tsi = extract_arscl_and_image_fsc_from_dataframes( typical_arscl_dataframe, typical_arscl_dataframe) # Reads data from shcu_dubious_data.csv, takes a sample of the times, and gets data for plotting
import csv from decimal import Decimal, InvalidOperation from operator import itemgetter import utils cities = utils.read_csv_file('../data/worldcities.csv') def decimal_or_zero(x): try: return Decimal(x) except InvalidOperation: if x == '': return Decimal('0') raise total_pop = sum(map(decimal_or_zero, map(itemgetter('population'), cities))) country_pop = {} with open('country-pop-cum.csv', 'w', encoding='utf-8', newline='') as f: keys = ['alpha-2', 'percent', 'cumulative'] writer = csv.DictWriter(f, keys) writer.writeheader() for c in cities: percent = decimal_or_zero(c['population']) / total_pop try: country_pop[c['iso2']] += percent except KeyError:
def gen_new_map(in_file_name, new_header, out_file_name): table_data, table_header = utils.read_csv_file(in_file_name) new_data = update_map_csv(table_data, table_header, new_header) utils.write_csv_file(out_file_name, new_data, new_header)
embedding_size = 512 vocab_size = 9955 img_dir = '2.jpg' vocab_dir = 'vocabulary.csv' res_v1_101_lstm_parameters_dir = 'res_v1_101_lstm.ckpt' # RGB mean value for images _R_MEAN = 123.68 _G_MEAN = 116.78 _B_MEAN = 103.94 # Preprocess the data, move to zero mean image = tf.placeholder(tf.uint8, [None, None, None, 3]) image = tf.cast(image, tf.float32) image = image - [_R_MEAN, _G_MEAN, _B_MEAN] input_seqs = tf.constant([1], dtype=tf.int64) vocab = utils.read_csv_file(vocab_dir) # Build model and predict outputs sequences output_words = model.res_v1_101_lstm(image, input_seqs, None, 1, embedding_size, vocab_size, False, 0.5) # Initialization local_vars_init_op = tf.local_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: sess.run(local_vars_init_op) saver.restore(sess, res_v1_101_lstm_parameters_dir) img_ori = cv2.imread(img_dir) img = cv2.resize(img_ori, (512, 512)) sentence = sess.run(output_words, feed_dict={image: [img]})
def read_csv_file(file): log.debug("reading file {}".format(file)) return utils.read_csv_file(file)
import pytest from all_api.topics import Topics from utils import read_csv_file index_page_Test_Data = read_csv_file('testdata/data.csv') post_topic_data = read_csv_file('testdata/post_topic.csv') @pytest.fixture def get_topic_id(): """ fixture pytest的测试套件,可以作为基础操作方法被调用 """ url = "/topics" t = Topics(url) r = t.post_create_topic("3333a0fb-6dd8-439e-813b-2c3a5213a154", "11111111111", "ask", "xxxxxxxxxxxxx") print(r.json()) res = r.json() topic_id = res['topic_id'] return topic_id def test_get_topic_detail(get_topic_id): print("get_topic_id===", get_topic_id) url = '/topic/' + get_topic_id t = Topics(url) r = t.get_topic_detail() print(r.json())
from utils import read_csv_file from frogsql import writetable data = read_csv_file('data/country-and-continent-codes-list-csv_csv.csv') for row in data: try: row['Country_Number'] = int(row['Country_Number']) except ValueError: row['Country_Number'] = None writetable(data, 'CountryAndContinentCodes', 'sql_scripts/insert_country_and_continent_codes.sql')
def find_by_name(self, name): return self.find({"name": name}) def count_by_ids(self, id_list): ids = [ObjectId(id) for id in id_list] return self.count({"_id": {"$in": ids}}) if __name__ == "__main__": import traceback import utils from config.application import app_context from db.mongo.models import NluCategory try: app_context.init_mongo_client() dao = NluCategoryDao(app_context.mongo_client) data_list = utils.read_csv_file("/Users/admin/Documents/workspace/eyesmedia-corpus-server/data/nlu", "category.csv") for data in data_list: model = NluCategory() model.code = data[0] model.name = data[1] dao.save_one(model.to_dict()) # dao = SentenceDao(app_context.mongo_client) # logger.info(dao.find_by_ids(["5b767a38efaa657a7929fa95"])) except: logger.error(traceback.format_exc())
add_conv2d(model, 256, True) add_conv2d(model, 512, True) #model.add(... finish defining the rest of your model architecture here ...) model.add(layers.Flatten()) model.add(layers.Dropout(0.25)) model.add(layers.Dense(32, activation="relu")) model.add(layers.BatchNormalization()) model.add(layers.Dense(1)) model.compile(loss='mse', optimizer=keras.optimizers.Adam(lr=0.001)) model.summary() return model if __name__ == "__main__": samples = utils.read_csv_file('data/driving_log.csv') samples += utils.read_csv_file("/opt/test/drive1/driving_log.csv") samples += utils.read_csv_file("/opt/test/drive2/driving_log.csv") samples += utils.read_csv_file("/opt/test/drive3/driving_log.csv") samples += utils.read_csv_file("/opt/test/drive4/driving_log.csv") samples = samples[1:] # remove header line from sklearn.model_selection import train_test_split train_samples, validation_samples = train_test_split(samples, test_size=0.1) #for i in generator(train_samples): # print(i[0].shape)
import csv import json import random import numpy as np from typing import List, Dict, Callable, TypeVar, Sequence import utils addresses = utils.read_csv_file( '../spreadsheet_data/da-base-OLTP - Address.csv') commodities = utils.read_csv_file( '../spreadsheet_data/da-base-OLTP - Address.csv') containers = utils.read_csv_file( '../spreadsheet_data/da-base-OLTP - Address.csv') container_models = utils.read_csv_file( '../spreadsheet_data/da-base-OLTP - Address.csv') ports = utils.read_csv_file('../spreadsheet_data/da-base-OLTP - Port.csv') business_entities = utils.read_csv_file( '../spreadsheet_data/da-base-OLTP - BusinessEntity.csv') countries = utils.read_csv_file('../data/ISO 3166 (countries).csv') cities = utils.read_csv_file('../data/worldcities.csv') country_pop = utils.read_csv_file('country-pop-cum.csv') def convert(list_of_dicts: List[Dict], keys: List[str], callable: Callable): for key in keys: for dct in list_of_dicts: dct[key] = callable(dct[key])
def pre_query3(session, filepath): # Raw column formater data = read_csv_file(filepath) prep = PreparerQuery3() valueToInsert = prep.transform(data) insert(query_cql.INSERT_TABLE_QUERY3, valueToInsert, session)
def get_num(s: str) -> str: m = re.match('[0-9]+', s) if m is not None: return m[0] return '' def get_zip_code(s: str) -> str: m = re.match('[A-Z]*[ -][0-9]+(-[0-9]*)?', s) if m is not None: return m[0] return '' raw_ports = utils.read_csv_file('spreadsheet_data/raw port data - Sheet1.csv') with open('output.csv', 'w', encoding='utf-8', newline='') as f: keys = list(raw_ports[0].keys()) + ['ZIP Code'] writer = csv.DictWriter(f, keys) writer.writeheader() for rp in raw_ports: s: str = rp['Address'] if s != '': address = decode_list(s) # x = [] # for i, part in enumerate(address):
def _create_variation_dict(attribute_variations): attribute_variations_dict = {} for attribute_variation in attribute_variations: attribute_variations_dict[attribute_variation] = [] return attribute_variations_dict def save_json_to_file(id3_tree, file_path): with open(file_path, 'w') as id3_file_path: id3_file_path.write(json.dumps(id3_tree)) if __name__ == '__main__': training = len(sys.argv) > 1 and sys.argv[1].lower() == 'true' headers = read_csv_file('playtennis_headers.txt')[0] play_tennis_file_data = read_csv_file('play_tennis.txt') if training: total_entropy_play_tennis = calculate_total_entropy( play_tennis_file_data, JOGA_TENIS, NAO_JOGA_TENIS, DECISION_INDEX ) id3_tree = train_decision_tree( headers, play_tennis_file_data, total_entropy_play_tennis ) save_json_to_file(id3_tree, 'id3_tree.json') else: pass
import json import random from typing import List, Dict, Callable import utils business_entities = utils.read_csv_file( '../spreadsheet_data/da-base-OLTP - BusinessEntity.csv') def convert(list_of_dicts: List[Dict], keys: List[str], callable: Callable): for key in keys: for dct in list_of_dicts: dct[key] = callable(dct[key]) convert(business_entities, ['Business Entity Key'], int) with open('output.txt', 'w', encoding='utf-8') as f: for entity in business_entities: if entity['Business Entity Key'] <= 3000: # human roles = list( set( random.choices(['Consignor', 'Consignee'], [3, 5], k=random.randint(1, 2)))) elif entity['Business Entity Key'] > 3000: # business roles = list( set( random.choices([
def __init__(self, filedir: str, filename: str, original): full_file_name = os.path.join(filedir, filename) self.data = read_csv_file(full_file_name).to_numpy().ravel() self.row_number = original.data.shape[0] self._validate_label_data()
''' fields_values['plot_type'] = combo.get() print('Combobox atualizado para: ', combo.get()) if __name__ == '__main__': # Mensagem de inicio no log print('Ferramenta para Analise de Datasets') # Cria pasta de resultados, caso a mesma nao exista if not datavis.verify_results_folder(): print('Erro na criacao da pasta. Encerrar.') # Solicita leitura do dataframe df, success = utils.read_csv_file() if not success: print(utils.close_program('user')) messagebox.showwarning('Erro de importação', utils.close_program('user')) else: # Estruturas de dados com informacoes basicas plots = [ 'barra', 'histograma', 'linha', 'dispersao', 'boxplot', 'violin' ] fields_values = dict([('plot_type', ''), ('target_column_x', ''), ('x_name', ''), ('target_column_y', ''), ('y_name', ''), ('title', ''), ('hue_column', ''), ('hue_name', '')])