Python read_csv_file 예제들, utils.read_csv_file Python 예제들

예제 #1

0

파일 보기

파일: calibration.py 프로젝트: wdzhong/vehsense-backend

def valid_gps_file(gps_file):
    """
    Check if the content of the given gps file is valid.

    Parameter
    ---------
    gps_file : str
        The path of the gps file

    Return
    ------
    True if the content of the file is value; False, otherwise.
    """
    if not os.path.isfile(gps_file):
        return False

    try:
        time_speed = utils.read_csv_file(gps_file, columns=[1, 4])  # time, speed
        ave_time = (time_speed[-1][0] - time_speed[0][0]) / 1000.0 / len(time_speed)
        if ave_time > 5:  # TODO: might need to be adjusted
            print("average interval of GPS samples: %.2f seconds, which is too large." % ave_time)
            return False
    except:
        return False

    return True

예제 #2

0

파일 보기

파일: calibration.py 프로젝트: wdzhong/vehsense-backend

def get_gravity_component(trip):
    """
    Get gravity component from the accelerometer readings.
    either use low filter to get the constant component for each axis,
    or find the stationary periods and get the 3 values at the same time.

    Parameters
    ---------
    trip : str
        Folder path

    Return
    ------
    rotation matrix : 2-D array
        shape 3*3
    """
    if debug:
        print('get gravity component')

    # TODO: should we save the parameters to file?
    # so that we don't need to recalculate?
    # in the end, saving the full calibration parameter should be enough
    gravity_component = [0.0] * 3

    acc_file = os.path.join(trip, constants.ACC_FILE_NAME)
    acc = utils.read_csv_file(acc_file, columns=[1, 3, 4, 5])

    gravity_component = get_gravity_from_acc(acc)

    return gravity_component

예제 #3

0

파일 보기

파일: calibration.py 프로젝트: wdzhong/vehsense-backend

def get_calibration_parameters(trip, require_obd, overwrite=False):
    """
    Get the calibration parameters for a single trip, and save them into a file
    under current folder.

    Parameters:
    ----------
    trip : str
        Folder path

    require_obd : boolean
        If True, then OBD file will be needed for calibration

    overwrite : boolean, default=False
        If True, overwrite the existing calibration parameter file.
        If False and the calibration parameter file already exists, then read the parameters from file and return.

    Returns
    -------
    calibration parameters : 1D array
        [1*9], i.e. [Ix, Iy, Iz, Jx, Jy, Jz, Kx, Ky, Kz]
    """
    if debug:
        print("get calibration parameters for trip: %s" % trip)
    if not overwrite:
        calib_file = os.path.join(trip, constants.CALIBRATION_FILE_NAME)
        if os.path.isfile(calib_file):
            if debug:
                print("%s already exists. And overwrite is set to be %s. Skip." % (calib_file, overwrite))
            with open(calib_file, 'r') as fp:
                line = fp.readline()
            parameters = line.rstrip().split(',')
            calibration_parameters = [float(p) for p in parameters]
            return calibration_parameters

    acc_file = os.path.join(trip, constants.ACC_FILE_NAME)
    acc = utils.read_csv_file(acc_file, columns=[1, 3, 4, 5])  # get numpy array

    gravity_component = get_gravity_from_acc(acc)
    acc_wt_gravity = remove_gravity_component(acc, gravity_component)
    j = get_j(trip, acc_wt_gravity, gravity_component, require_obd=require_obd)

    gravity_component_norm = norm_vector(gravity_component)
    j_norm = norm_vector(j)
    i = np.cross(j_norm, gravity_component_norm)

    ans = i.tolist()
    ans.extend(j_norm)
    ans.extend(gravity_component_norm)
    output_file = os.path.join(trip, constants.CALIBRATION_FILE_NAME)

    with open(output_file, 'w') as fp:
        fp.write(','.join([str(a) for a in ans]))
    return ans

예제 #4

0

파일 보기

파일: data.py 프로젝트: mdsung/multiple_variable_differential_privacy

    def __init__(self, filedir: str, filename: str, categorical_columns: List):
        full_file_name = os.path.join(filedir, filename)
        self.data = read_csv_file(full_file_name)
        self.categorical_columns = categorical_columns
        self.continuous_columns = [
            d for d in self.data if d not in categorical_columns
        ]

        self.levels = self._get_levels()
        self.filter_category = self.levels > 0
        self.filter_continuous = self.levels == 0

        self.data = self.data.to_numpy()

예제 #5

0

파일 보기

파일: create_event_data.py 프로젝트: cwray01/DE_Data_Modeling_with_Apache_Cassandra

def main():
    """
    Reads all event_data files,
    concatenates them to one dataframe
    and writes them into file.
    """
    print("Reading event_data files...")
    event_data_new = pd.concat([
        utils.read_csv_file(file)
        for file in utils.get_csv_from_dir("event_data")
    ])
    print("Writing event_data_new file...")
    event_data_new.to_csv("event_data_new.csv", index=False)
    print("Done.")

예제 #6

0

파일 보기

파일: data_processor.py 프로젝트: cwray01/DE_Data_Modeling_with_Apache_Cassandra

def main():
    preparer = PreparerQuery1()
    # print(os.getcwd())
    values = read_csv_file("../data/event_data_new.csv")
    print(values.to_string)
    isdf = isinstance(values, pd.DataFrame)
    # print(isdf)
    print("----------------------")

    pp_values = preparer.prepare(values)
    print(pp_values.to_string)

    print("----------------------*****")

    # print(values["artist"])
    print(preparer.prepare(values)["artist_name"])

예제 #7

0

파일 보기

파일: clean.py 프로젝트: wdzhong/vehsense-backend

def valid_gps(root, gps_max_interval, min_duration):
    """
    Check the gps file is valid or not

    Parameters
    ----------
    root : str
        The folder contains the gps file.

    gps_max_interval : int
        The maximum sampling interval of a good trip.

    min_duration : int
        The minimum duration of a good trip. Unit is minute.

    Return
    ------
    valid : bool
        True if the gps file is good. Fasle, otherwise.
    """
    gps_file = os.path.join(root, constants.GPS_FILE_NAME)
    if not valid_gps_file(gps_file):
        if debug:
            print("invalid gps file: %s" % root)
        return False

    time_speed = read_csv_file(gps_file, columns=[1, 4])
    trip_duration = (time_speed[-1][0] - time_speed[0][0]) / 1000.0  # seconds
    ave_time = trip_duration / len(time_speed)
    if ave_time > gps_max_interval:
        if debug:
            print("Trip: %s" % root)
            print("average interval of GPS samples: %.2f seconds, which is too large." % ave_time)
        return False

    if trip_duration / 60.0 < min_duration:
        if debug:
            print("Trip: %s" % root)
            print("Trip is too short: %.2f minutes." % (trip_duration / 60.0))
        return False

    if debug:
        print("Trip: %s" % root)
        print("\tAverage interval of GPS samples: %.2f seconds, which is good." % ave_time)
        print("\tTrip length is: %.2f minutes." % (trip_duration / 60.0))

    return True

예제 #8

0

파일 보기

def load_concepts(file_path, concept1, cnx):
    concepts_read = 0
    concepts_inserted = 0
    concepts_errors = 0
    row = 2
    for line in utils.read_csv_file(file_path, delimiter='\t'):
        concept = concepts_file_parser.get_concepts(line)
        concepts_read += 1
        try:
            if (len(concept['pxordx']) != 0 and # Verificamos algunas columnas que no tiene valores nulos
                len(concept['codetype']) != 0 and
                len(concept['vocabulary_id']) != 0 and 
                len(concept['domain_id']) != 0 and 
                len(concept['code']) != 0):

                # Add new concept to dictionary

                # Ya que no hay una columna que sus datos sean unicos, generamos nuestra propia llave combinando dos 
                # atributos, 'concept_id' es casi unica, pero tiene datos vacios, entonces la concateno con 'code'. 
                ref1 = concept['concept_id']
                if ref1 != None:
                    concepts_ref = concept['code'] + ref1 
                else:
                    concepts_ref = concept['code']
                if (concepts_ref not in concept1):
                    id = database.add_concepts(concept,
                                                 cnx)
                                                 
                    concept[concepts_ref] = id
                    logger.info("Inserting concept code {0} in database.".format(concepts_ref))
                    concepts_inserted += 1
                else:
                    logger.info("concept code {0} already exists in database.".format(concepts_ref))
            else:
                message = "Error in row: %d, missing fields to create new concept." % row
                logger.error(message)
                print(message)
                concepts_errors += 1
        except Exception as e:
            message = str(e) + " file: {0} - row: {1}".format(file_path, row)
            logger.error(message)
            print(message)
            concepts_errors += 1
        row += 1

예제 #9

0

파일 보기

def load_vocabularies(file_path, vocabularies, cnx):
    vocabulary_read = 0
    vocabulary_inserted = 0
    vocabulary_errors = 0
    row = 2
    for line in utils.read_csv_file(file_path, delimiter='\t'):
        vocabulary = vocabulary_file_parser.get_vocabulary(line)
        vocabulary_read += 1
        try:
            if (len(vocabulary['ref']) != 0 and len(vocabulary['name']) != 0
                    and len(vocabulary['url']) != 0
                    and len(vocabulary['version']) != 0
                    and len(vocabulary['description']) != 0
                    and len(vocabulary['status']) != 0):

                # Add new vocabulary to dictionary
                vocabulary_ref = vocabulary['ref'].strip()
                if vocabulary_ref not in vocabularies:
                    id = database.add_vocabulary(vocabulary, cnx)

                    vocabularies[vocabulary_ref] = id
                    logger.info(
                        "Inserting vocabulary ref {0} in database.".format(
                            vocabulary['ref']))
                    vocabulary_inserted += 1
                else:
                    logger.info(
                        "Vocabulary ref {0} already exists in database.".
                        format(vocabulary['ref']))
            else:
                message = "Error in row: %d, missing fields to create new vocabulary." % row
                logger.error(message)
                print(message)
                vocabulary_errors += 1
        except Exception as e:
            message = str(e) + " file: {0} - row: {1}".format(file_path, row)
            logger.error(message)
            print(message)
            vocabulary_errors += 1
            return False
        row += 1
    return True

예제 #10

0

파일 보기

파일: concepts_etl.py 프로젝트: jeisongarces19/PrimeraEntregaWeb

def load_concepts(file_path, conceptsDIC, cnx):
    concepts_read = 0
    concepts_inserted = 0
    concepts_errors = 0
    row = 2
    for line in utils.read_csv_file(file_path, delimiter='\t'):
        concepts = concepts_file_parser.get_concepts(line)
        concepts_read += 1
        try:
            if (concepts['pxordx'] != None and concepts['codetype'] != None
                    and concepts['concept_id'] != None
                    and concepts['vocabulary_id'] != None
                    and concepts['domain_id'] != None):
                #if (concepts['code']) != None:
                # Add new concepts to dictionary
                #code = concepts['code'].strip()
                concept_id = concepts['concept_id'].strip()
                if concept_id not in conceptsDIC:
                    id = database.add_concepts(concepts, cnx)

                    conceptsDIC[concept_id] = id
                    logger.info(
                        "Inserting concepts code {0} in database.".format(
                            concepts['concept_id']))
                    concepts_inserted += 1
                else:
                    logger.info(
                        "concepts code {0} already exists in database.".format(
                            concepts['concept_id']))
            else:
                message = "Error in row: %d, missing fields to create new concepts." % row
                logger.error(message)
                print(message)
                concepts_errors += 1
        except Exception as e:
            message = str(e) + " file: {0} - row: {1}".format(file_path, row)
            logger.error(message)
            print(message)
            concepts_errors += 1
            return False
        row += 1
    return True

예제 #11

0

파일 보기

파일: hierarchical.py 프로젝트: abiraja2004/CNSummarizer

    def sort_by_symmetry(self, order, type, h):
        #order : h - l
        #type: b - m
        #h: 2-3
        if type == 'b':
            nType = 0
        else:
            nType = 1
        if h == '2':
            nH = 0
        else:
            nH = 1

        symmetries = read_csv_file()
        measure = symmetries[nType][nH]

        the_high = reverseSortList(measure)

        if order == 'h':
            return the_high
        else:
            return specialSortList(the_high)

예제 #12

0

파일 보기

def valid_gps_file(gps_file, max_interval=None):
    """
    Check if the content of the given gps file is valid.

    Parameter
    ---------
    gps_file : str
        The path of the gps file

    max_interval : int, default=None
        The maximum average sample interval, seconds

    Return
    ------
    True if the content of the file is value; False, otherwise.
    """
    if not os.path.isfile(gps_file):
        return False

    try:
        time_speed = utils.read_csv_file(gps_file, columns=[1,
                                                            4])  # time, speed
        if len(time_speed) == 0:
            return False

        if max_interval:
            ave_time = (time_speed[-1][0] -
                        time_speed[0][0]) / 1000.0 / len(time_speed)
            if ave_time > max_interval:
                print(
                    "average interval of GPS samples: %.2f seconds, which is too large."
                    % ave_time)
                return False
    except:
        return False

    return True

예제 #13

0

파일 보기

파일: merge_ports.py 프로젝트: da-frog/sea-freight-shipping

import csv
import json
from typing import List, Dict, Callable
import difflib
from operator import itemgetter

import utils
import re
from location import Location

bols = utils.read_csv_file('spreadsheet_data/da-base-OLTP - BillOfLading.csv')

addresses = utils.read_csv_file('spreadsheet_data/da-base-OLTP - Address.csv')
commodities = utils.read_csv_file(
    'spreadsheet_data/da-base-OLTP - Address.csv')
containers = utils.read_csv_file('spreadsheet_data/da-base-OLTP - Address.csv')
container_models = utils.read_csv_file(
    'spreadsheet_data/da-base-OLTP - Address.csv')
ports = utils.read_csv_file('spreadsheet_data/da-base-OLTP - Port.csv')
business_entities = utils.read_csv_file(
    'spreadsheet_data/da-base-OLTP - BusinessEntity.csv')

# countries = utils.read_csv_file('data/ISO 3166 (countries).csv')

# cities = utils.read_csv_file('data/worldcities.csv')
# country_pop = utils.read_csv_file('country-pop-cum.csv')


def convert(list_of_dicts: List[Dict], keys: List[str], callable: Callable):
    for key in keys:
        for dct in list_of_dicts:

예제 #14

0

파일 보기

파일: rebalance.py 프로젝트: intelliflovrk/raj_test_io

 def _get_rebalance_event_report_data(self):
     return utils.read_csv_file(
         utils.open_downloaded_file(
             self, "Rebalance Event {0}.csv".format(
                 datetime.now().strftime("%d%m%y")), "rt"))

예제 #15

0

파일 보기

파일: fsc_analyze.py 프로젝트: PeterDrake/sky

def scatter_plot(scatter, name, ylabel, title, xlabel):
    """Makes a plot with the given parameters"""
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.scatter(scatter[0], scatter[1], s=0.5)
    plt.plot([0, 1], [0, 1], c='orange', lw=2)
    plt.savefig(RESULTS_DIR + '/' + EXPERIMENT_LABEL + name, dpi=300)
    plt.close()


if __name__ == "__main__":

    # Reads data from shcu_typical_data.csv, takes a sample of the times, and gets data for plotting
    typical_arscl_dataframe = read_csv_file(
        TYPICAL_DATA_CSV)  # Contains both ARSCL and TSI Data
    typical_arscl_dataframe = typical_arscl_dataframe.dropna(
        subset=['fsc_z', 'cf_tot', 'timestamp_utc'])
    typical_times = load_pickled_file(TYPICAL_VALID_FILE)
    print('after unpickling')
    # print(typical_times)
    typical_times = typical_times[0:N_SAMPLES]
    print('after 0:N_SAMPLES')
    # print(typical_times)

    typical_arscl_dataframe = typical_arscl_dataframe[
        typical_arscl_dataframe['timestamp_utc'].isin(typical_times)]
    typical_arscl_tsi = extract_arscl_and_image_fsc_from_dataframes(
        typical_arscl_dataframe, typical_arscl_dataframe)

    # Reads data from shcu_dubious_data.csv, takes a sample of the times, and gets data for plotting

예제 #16

0

파일 보기

파일: calculate_cities_cum_pop.py 프로젝트: da-frog/sea-freight-shipping

import csv
from decimal import Decimal, InvalidOperation
from operator import itemgetter

import utils

cities = utils.read_csv_file('../data/worldcities.csv')


def decimal_or_zero(x):
    try:
        return Decimal(x)
    except InvalidOperation:
        if x == '':
            return Decimal('0')
        raise


total_pop = sum(map(decimal_or_zero, map(itemgetter('population'), cities)))

country_pop = {}

with open('country-pop-cum.csv', 'w', encoding='utf-8', newline='') as f:
    keys = ['alpha-2', 'percent', 'cumulative']
    writer = csv.DictWriter(f, keys)
    writer.writeheader()
    for c in cities:
        percent = decimal_or_zero(c['population']) / total_pop
        try:
            country_pop[c['iso2']] += percent
        except KeyError:

예제 #17

0

파일 보기

def gen_new_map(in_file_name, new_header, out_file_name):
    table_data, table_header = utils.read_csv_file(in_file_name)
    new_data = update_map_csv(table_data, table_header, new_header)
    utils.write_csv_file(out_file_name, new_data, new_header)

예제 #18

0

파일 보기

embedding_size = 512
vocab_size = 9955
img_dir = '2.jpg'
vocab_dir = 'vocabulary.csv'
res_v1_101_lstm_parameters_dir = 'res_v1_101_lstm.ckpt'
# RGB mean value for images
_R_MEAN = 123.68
_G_MEAN = 116.78
_B_MEAN = 103.94

# Preprocess the data, move to zero mean
image = tf.placeholder(tf.uint8, [None, None, None, 3])
image = tf.cast(image, tf.float32)
image = image - [_R_MEAN, _G_MEAN, _B_MEAN]
input_seqs = tf.constant([1], dtype=tf.int64)
vocab = utils.read_csv_file(vocab_dir)

# Build model and predict outputs sequences
output_words = model.res_v1_101_lstm(image, input_seqs, None, 1,
                                     embedding_size, vocab_size, False, 0.5)

# Initialization
local_vars_init_op = tf.local_variables_initializer()
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(local_vars_init_op)
    saver.restore(sess, res_v1_101_lstm_parameters_dir)
    img_ori = cv2.imread(img_dir)
    img = cv2.resize(img_ori, (512, 512))
    sentence = sess.run(output_words, feed_dict={image: [img]})

예제 #19

0

파일 보기

파일: cms_cleaner.py 프로젝트: RaigaX9/ranger

def read_csv_file(file):
    log.debug("reading file {}".format(file))
    return utils.read_csv_file(file)

예제 #20

0

파일 보기

파일: test_topics.py 프로젝트: wsdxl/api_testing_0421

import pytest
from all_api.topics import Topics
from utils import read_csv_file

index_page_Test_Data = read_csv_file('testdata/data.csv')
post_topic_data = read_csv_file('testdata/post_topic.csv')


@pytest.fixture
def get_topic_id():
    """
    fixture pytest的测试套件，可以作为基础操作方法被调用
    """
    url = "/topics"
    t = Topics(url)
    r = t.post_create_topic("3333a0fb-6dd8-439e-813b-2c3a5213a154",
                            "11111111111", "ask", "xxxxxxxxxxxxx")
    print(r.json())
    res = r.json()
    topic_id = res['topic_id']
    return topic_id


def test_get_topic_detail(get_topic_id):
    print("get_topic_id===", get_topic_id)

    url = '/topic/' + get_topic_id
    t = Topics(url)
    r = t.get_topic_detail()
    print(r.json())

예제 #21

0

파일 보기

from utils import read_csv_file
from frogsql import writetable

data = read_csv_file('data/country-and-continent-codes-list-csv_csv.csv')

for row in data:
    try:
        row['Country_Number'] = int(row['Country_Number'])
    except ValueError:
        row['Country_Number'] = None

writetable(data, 'CountryAndContinentCodes',
           'sql_scripts/insert_country_and_continent_codes.sql')

예제 #22

0

파일 보기

    def find_by_name(self, name):
        return self.find({"name": name})

    def count_by_ids(self, id_list):
        ids = [ObjectId(id) for id in id_list]
        return self.count({"_id": {"$in": ids}})


if __name__ == "__main__":
    import traceback
    import utils
    from config.application import app_context
    from db.mongo.models import NluCategory

    try:
        app_context.init_mongo_client()

        dao = NluCategoryDao(app_context.mongo_client)
        data_list = utils.read_csv_file("/Users/admin/Documents/workspace/eyesmedia-corpus-server/data/nlu", "category.csv")
        for data in data_list:
            model = NluCategory()
            model.code = data[0]
            model.name = data[1]
            dao.save_one(model.to_dict())

        # dao = SentenceDao(app_context.mongo_client)
        # logger.info(dao.find_by_ids(["5b767a38efaa657a7929fa95"]))
    except:
        logger.error(traceback.format_exc())

예제 #23

0

파일 보기

파일: model.py 프로젝트: hukkelas/CarND-Behavioral-Cloning-P3

        add_conv2d(model, 256, True)
        add_conv2d(model, 512, True)

    #model.add(... finish defining the rest of your model architecture here ...)
    model.add(layers.Flatten())
    model.add(layers.Dropout(0.25))
    model.add(layers.Dense(32, activation="relu"))
    model.add(layers.BatchNormalization())
    model.add(layers.Dense(1))
    model.compile(loss='mse', optimizer=keras.optimizers.Adam(lr=0.001))
    model.summary()
    return model
    
    
if __name__ == "__main__":
    samples = utils.read_csv_file('data/driving_log.csv')
    samples += utils.read_csv_file("/opt/test/drive1/driving_log.csv")
    samples += utils.read_csv_file("/opt/test/drive2/driving_log.csv")
    samples += utils.read_csv_file("/opt/test/drive3/driving_log.csv")
    samples += utils.read_csv_file("/opt/test/drive4/driving_log.csv")

    samples = samples[1:] # remove header line

    
    from sklearn.model_selection import train_test_split
    train_samples, validation_samples = train_test_split(samples, test_size=0.1)


    #for i in generator(train_samples):
    #    print(i[0].shape)

예제 #24

0

파일 보기

import csv
import json
import random
import numpy as np
from typing import List, Dict, Callable, TypeVar, Sequence

import utils

addresses = utils.read_csv_file(
    '../spreadsheet_data/da-base-OLTP - Address.csv')
commodities = utils.read_csv_file(
    '../spreadsheet_data/da-base-OLTP - Address.csv')
containers = utils.read_csv_file(
    '../spreadsheet_data/da-base-OLTP - Address.csv')
container_models = utils.read_csv_file(
    '../spreadsheet_data/da-base-OLTP - Address.csv')
ports = utils.read_csv_file('../spreadsheet_data/da-base-OLTP - Port.csv')
business_entities = utils.read_csv_file(
    '../spreadsheet_data/da-base-OLTP - BusinessEntity.csv')

countries = utils.read_csv_file('../data/ISO 3166 (countries).csv')

cities = utils.read_csv_file('../data/worldcities.csv')
country_pop = utils.read_csv_file('country-pop-cum.csv')


def convert(list_of_dicts: List[Dict], keys: List[str], callable: Callable):
    for key in keys:
        for dct in list_of_dicts:
            dct[key] = callable(dct[key])

예제 #25

0

파일 보기

파일: etl.py 프로젝트: cwray01/DE_Data_Modeling_with_Apache_Cassandra

def pre_query3(session, filepath):
    # Raw column formater
    data = read_csv_file(filepath)
    prep = PreparerQuery3()
    valueToInsert = prep.transform(data)
    insert(query_cql.INSERT_TABLE_QUERY3, valueToInsert, session)

예제 #26

0

파일 보기

파일: divide_address.py 프로젝트: da-frog/sea-freight-shipping

def get_num(s: str) -> str:
    m = re.match('[0-9]+', s)
    if m is not None:
        return m[0]
    return ''


def get_zip_code(s: str) -> str:
    m = re.match('[A-Z]*[ -][0-9]+(-[0-9]*)?', s)
    if m is not None:
        return m[0]
    return ''


raw_ports = utils.read_csv_file('spreadsheet_data/raw port data - Sheet1.csv')

with open('output.csv', 'w', encoding='utf-8', newline='') as f:

    keys = list(raw_ports[0].keys()) + ['ZIP Code']
    writer = csv.DictWriter(f, keys)
    writer.writeheader()

    for rp in raw_ports:
        s: str = rp['Address']

        if s != '':
            address = decode_list(s)

            # x = []
            # for i, part in enumerate(address):

예제 #27

0

파일 보기


def _create_variation_dict(attribute_variations):
    attribute_variations_dict = {}
    for attribute_variation in attribute_variations:
        attribute_variations_dict[attribute_variation] = []
    return attribute_variations_dict


def save_json_to_file(id3_tree, file_path):
    with open(file_path, 'w') as id3_file_path:
        id3_file_path.write(json.dumps(id3_tree))


if __name__ == '__main__':
    training = len(sys.argv) > 1 and sys.argv[1].lower() == 'true'

    headers = read_csv_file('playtennis_headers.txt')[0]
    play_tennis_file_data = read_csv_file('play_tennis.txt')

    if training:
        total_entropy_play_tennis = calculate_total_entropy(
            play_tennis_file_data, JOGA_TENIS, NAO_JOGA_TENIS, DECISION_INDEX
        )
        id3_tree = train_decision_tree(
            headers, play_tennis_file_data, total_entropy_play_tennis
        )
        save_json_to_file(id3_tree, 'id3_tree.json')
    else:
        pass

예제 #28

0

파일 보기

파일: randomize_roles.py 프로젝트: da-frog/sea-freight-shipping

import json
import random
from typing import List, Dict, Callable

import utils

business_entities = utils.read_csv_file(
    '../spreadsheet_data/da-base-OLTP - BusinessEntity.csv')


def convert(list_of_dicts: List[Dict], keys: List[str], callable: Callable):
    for key in keys:
        for dct in list_of_dicts:
            dct[key] = callable(dct[key])


convert(business_entities, ['Business Entity Key'], int)

with open('output.txt', 'w', encoding='utf-8') as f:
    for entity in business_entities:
        if entity['Business Entity Key'] <= 3000:
            # human
            roles = list(
                set(
                    random.choices(['Consignor', 'Consignee'], [3, 5],
                                   k=random.randint(1, 2))))
        elif entity['Business Entity Key'] > 3000:
            # business
            roles = list(
                set(
                    random.choices([

예제 #29

0

파일 보기

파일: data.py 프로젝트: mdsung/multiple_variable_differential_privacy

 def __init__(self, filedir: str, filename: str, original):
     full_file_name = os.path.join(filedir, filename)
     self.data = read_csv_file(full_file_name).to_numpy().ravel()
     self.row_number = original.data.shape[0]
     self._validate_label_data()

예제 #30

0

파일 보기

파일: main.py 프로젝트: gcaschera/tcc_enc_ufscar

    '''
    fields_values['plot_type'] = combo.get()
    print('Combobox atualizado para: ', combo.get())


if __name__ == '__main__':

    # Mensagem de inicio no log
    print('Ferramenta para Analise de Datasets')

    # Cria pasta de resultados, caso a mesma nao exista
    if not datavis.verify_results_folder():
        print('Erro na criacao da pasta. Encerrar.')

    # Solicita leitura do dataframe
    df, success = utils.read_csv_file()
    if not success:
        print(utils.close_program('user'))
        messagebox.showwarning('Erro de importação',
                               utils.close_program('user'))
    else:

        # Estruturas de dados com informacoes basicas
        plots = [
            'barra', 'histograma', 'linha', 'dispersao', 'boxplot', 'violin'
        ]

        fields_values = dict([('plot_type', ''), ('target_column_x', ''),
                              ('x_name', ''), ('target_column_y', ''),
                              ('y_name', ''), ('title', ''),
                              ('hue_column', ''), ('hue_name', '')])