Пример #1
0
    def OpenFileDialog(self):
        self.remove_columnNames_from_comboBox(self.cmbx_x_axis_data)
        self.remove_columnNames_from_comboBox(self.cmbx_y_axis_data)
        open_file_dialog = QFileDialog.getOpenFileName(self, 'Open file', "", "CSV files (*.csv)")
        if open_file_dialog == ('', ''):
            print(f'>>Info: No file was Selected: {(self.fname)}')
            # self.fname = None
            # self.dataframe = pd.DataFrame()
            
        else:
            self.fname = open_file_dialog[0]
            print(f'>>Info: Selected File: {(self.fname)}')
            self.file_size = str(os.path.getsize(self.fname)/1e6) #in MB, string
            self.dataframe = read_data(self.fname, pd.DataFrame()).data()
            self.file_rows, self.file_cols = self.dataframe.shape
            model = pandasModel(self.dataframe)
            self.table_data.setModel(model)

        self.textBox_filePath.setText(self.fname)
        self.textBox_fileSize.setText(self.file_size)

        self.add_columnNames_to_comboBox(self.cmbx_x_axis_data, self.dataframe)
        self.add_columnNames_to_comboBox(self.cmbx_y_axis_data, self.dataframe)

        self.textBox_fileColumns.setText(str(self.file_cols))
        self.textBox_fileRows.setText(str(self.file_rows))
Пример #2
0
def get_single_image(infile, nth_image):

    # read in the aps file, it comes in as shape(512, 620, 16)
    img = read_data(infile)

    # transpose so that the slice is the first dimension shape(16, 620, 512)
    img = img.transpose()

    return np.flipud(img[nth_image])
Пример #3
0
def main():
    dates, datas, indexs, indexs_inv = rf.read_data(StatObj.data_path(), 0.5)
    fp_label, y_name = rf.read_label(StatObj.label_path())
    line = [0] * len(y_name)
    for i, name in enumerate(y_name):
        id_value = indexs[name]
        line = plt.plot(datas[:, id_value], label=str(id_value))
        plt.legend(str(id_value))
    plt.show()
Пример #4
0
def test():
    if os.path.exists("../Data/datas.npz") and os.path.exists(
            "../Data/datas.pkl"):
        # load datas
        with np.load('../Data/datas.npz') as obj:
            datas = obj['datas']
            dates = obj['dates']

            with open('../Data/datas.pkl', 'rb') as infile:
                indexs = pickle.load(infile)
                labels = pickle.load(infile)
                y_name = pickle.load(infile)

    else:
        # load datas
        labels, y_name = rf.read_label(label_path)
        dates, datas, indexs = rf.read_data(data_path, y_name, 0)

        # save datas
        np.savez('../Data/datas.npz', datas=datas, dates=dates)
        with open('../Data/datas.pkl', 'wb') as outfile:
            pickle.dump(indexs, outfile)
            pickle.dump(labels, outfile)
            pickle.dump(y_name, outfile)

    print("load %d data" % datas.shape[0])

    # seperate feature(x_train) and prediction(y_train)
    y_index = [indexs[i] for i in y_name]
    y_data = datas[:, y_index]
    x_data = np.delete(datas, y_index, axis=1)
    print("val  : %d label  " % y_data.shape[1])
    print("\n----------------------------------------------")
    print("train: %d feature" % x_data.shape[1])

    # seperate train and validation dataset
    print("\nseperate data...\n")
    x_train, y_train, x_val, y_val = seperate_dataset(x_data, y_data, 0.8)
    print("train: %d cases" % x_train.shape[0])
    print("val  : %d cases" % x_val.shape[0])
    print("\n----------------------------------------------")
    k = 20
    feature_eng = SelectKBest(mutual_info_regression, k)
    x_train_new = feature_eng.fit_transform(x_train, y_train[:, 0])
    x_val_new = feature_eng.transform(x_val)
    print("keep %d feature" % k)
    print("\n----------------------------------------------")
    feat_selected = feature_eng.get_support(True)
    print("-----------")
    for i in range(len(feat_selected)):
        print(indexs.inv[feat_selected[i]])
    print("\n----------------------------------------------")
    print("train model...\n")
    # print(labels)
    a = labels.set_index('Unnamed: 0')['def'].to_dict()
    for i in feat_selected:
        print(a[i])
Пример #5
0
def write():
    target = open("small_var.txt", 'w')
    target.truncate()
    dates, datas, indexs = rf.read_data(StatObj.data_path(), 0.5)
    fp_label, y_name = rf.read_label(StatObj.label_path())
    a = np.ones()
    for name in y_name:
        id_value = indexs[name]
        lines = str(datas[:, id_value])
        target.write(lines[1:-2])
        target.write('\n')
    target.close()
Пример #6
0
def plot_image_set(infile):

    # read in the aps file, it comes in as shape(512, 620, 16)
    img = read_data(infile)
    
    # transpose so that the slice is the first dimension shape(16, 620, 512)
    img = img.transpose()
        
    # show the graphs
    fig, axarr = plt.subplots(nrows=4, ncols=4, figsize=(10,10))
    
    i = 0
    for row in range(4):
        for col in range(4):
            #resized_img = cv2.resize(img[i], (0,0), fx=0.1, fy=0.1)
            #axarr[row, col].imshow(np.flipud(resized_img), cmap=COLORMAP)
            axarr[row, col].imshow(np.flipud(img[i]), cmap=COLORMAP)
            i += 1
    
    print('Done!')
Пример #7
0
def read_fast():
    if os.path.exists("../Data/datas.npy"):
        # load datas
        datas = np.load('../Data/datas.npy')
        dates = np.load('../Data/dates.npy')
        outfile = open('../Data/indexs.pkl', 'rb')
        indexs = pickle.load(outfile)
        outfile.close()
        outfile = open('../Data/indexs_inv.pkl', 'rb')
        indexs_inv = pickle.load(outfile)
        outfile.close()
        outfile = open('../Data/labels.pkl', 'rb')
        labels = pickle.load(outfile)
        outfile.close()
        outfile = open('../Data/y_name.pkl', 'rb')
        y_name = pickle.load(outfile)
        outfile.close()
    else:
        # load datas
        labels, y_name = rf.read_label(label_path)
        dates, datas, indexs, indexs_inv = rf.read_data(data_path, y_name, 0)
        # save datas
        np.save('../Data/datas.npy', datas)
        np.save('../Data/dates.npy', dates)
        outfile = open('../Data/indexs.pkl', 'wb')
        pickle.dump(indexs, outfile)
        outfile.close()
        outfile = open('../Data/indexs_inv.pkl', 'wb')
        pickle.dump(indexs_inv, outfile)
        outfile.close()
        outfile = open('../Data/labels.pkl', 'wb')
        pickle.dump(labels, outfile)
        outfile.close()
        outfile = open('../Data/y_name.pkl', 'wb')
        pickle.dump(y_name, outfile)
        outfile.close()
    return dates, datas, indexs, indexs_inv
Пример #8
0
from sklearn.linear_model import Ridge
import read_file as rf
import numpy as np

data_path = "../Data_M.csv"
label_path = "../Dico_M.csv"

labels, y_name = rf.read_label(label_path)
dates, datas, indexs = rf.read_data(data_path, 0.3)

# seperate feature(x_train) and prediction(y_train)
y_index = [indexs[i] for i in y_name]
y_train = datas[:, y_index]
x_train = np.delete(datas, y_index, axis=1)

clf = Ridge(alpha=1.0)
clf.fit(x_train[:-2, :], y_train[2:, 0])
print(clf.score(x_train[:-2, :], y_train[2:, 0]))
print(clf.coef_)
Пример #9
0
    classes.append((a, high + 1))

    return classes
    
def calculate_classes_count(classes, numbers):
    counts = [0] * len(classes)

    for number in numbers:
        for i, class_range in enumerate(classes):
            if (class_range[0] <= number and number < class_range[1]):
                counts[i] += 1
                break

    return counts

def create_table(classes, counts):
    print('\tclasses\t\tcount')
    for i, class_range in enumerate(classes):
        print(str(class_range[0]) + '\t-\t' + str(class_range[1]) + '\t' + str(counts[i]))
        

if __name__ == '__main__':
    data = read_file.read_data('test.txt')
    try:
        n = float(input('Enter class size : '))
        classes = create_classes(data, n)
        counts = calculate_classes_count(classes, data)
        create_table(classes, counts)
    except ValueError:
        print('Invalid number')
    
Пример #10
0
    print "0"
    # load datas
    with np.load('../Data/datas.npz') as obj:
        datas = obj['datas']
        dates = obj['dates']

    with open('../Data/datas.pkl', 'rb') as infile:
        indexs = pickle.load(infile)
        labels = pickle.load(infile)
        y_name = pickle.load(infile)

else:
    print "1"
    # load datas
    labels, y_name = rf.read_label(label_path)
    dates, datas, indexs = rf.read_data(data_path, y_name, 0)

    # save datas
    np.savez('../Data/datas.npz', datas=datas, dates=dates)
    with open('../Data/datas.pkl', 'wb') as outfile:
        pickle.dump(indexs, outfile)
        pickle.dump(labels, outfile)
        pickle.dump(y_name, outfile)

with open('../Data/kmeans.pkl', 'rb') as infile:
    tf_class = pickle.load(infile)

print("load %d data" % datas.shape[0])

# seperate feature(x_train) and prediction(y_train)
y_index = [indexs[i] for i in y_name]
                                               y,
                                               cv=5,
                                               scoring='accuracy')
    print("Tree-Classifier with 5 Cross validation Accuracy:",
          (np.mean(np.sqrt(abs(scores6)))))

    model7 = DecisionTreeClassifier()
    scores7 = cross_validation.cross_val_score(model7,
                                               X,
                                               y,
                                               cv=5,
                                               scoring='accuracy')
    print("Decision-Tree-Classifier with 5 Cross validation Accuracy:",
          (np.mean(np.sqrt(abs(scores7)))))

    model8 = MLPClassifier(solver='adam',
                           alpha=0.01,
                           hidden_layer_sizes=(10, 10))
    scores8 = cross_validation.cross_val_score(model8,
                                               X,
                                               y,
                                               cv=5,
                                               scoring='accuracy')
    print("MLP classifier's with 5 Cross validation Accuracy:",
          (np.mean(np.sqrt(abs(scores8)))))


X, y = read_data("data/final_dataset.csv")
np.random.seed(0)
Cross_Validation(X, y)
Пример #12
0
def data_generators(data_generator: str,
                    batch_repeat: int = 10,
                    batch_sleep: float = 0.5,
                    timezone: str = 'utc',
                    enable_timezone_range: bool = True,
                    token: str = None,
                    tag: str = None,
                    initial_configs: bool = False,
                    data_dir: str = os.path.join(ROOT_PATH, 'data'),
                    compress: bool = False,
                    exception: bool = False) -> dict:
    """
    Based on the parameters generate a data set
    :args:
        data_generator:str - which data set to generated
        batch_repeat:int - number of rows per batch
        batch_sleep:float - sleep time between rows or a specific batch
        timezone:str - whether to set the timezone in UTC or local
        enable_timezone_range:bool - whether or not to set timestamp within a "range"
        token:str - linode token
        tag:str - group of linode nodes to get data from. If not gets from all nodes associated to token
        initial_configs:bool - whether this is the first time the configs are being deployed
        data_dir:str - for data_generator type file directory containing data to read
        compress:bool - whether the content in data_dir is compressed
        exception:bool - whether or not to print error message(s)
    :params:
        payloads:dict - generated data
    :reeturn:
        payloads
    """
    if data_generator == 'linode':
        import linode
        payloads = linode.get_linode_data(
            token=token,
            tag=tag,
            initial_configs=initial_configs,
            timezone=timezone,
            enable_timezone_range=enable_timezone_range,
            exception=exception)
    elif data_generator == 'percentagecpu':
        import percentagecpu_sensor
        payloads = percentagecpu_sensor.get_percentagecpu_data(
            timezone=timezone,
            enable_timezone_range=enable_timezone_range,
            sleep=batch_sleep,
            repeat=batch_repeat)
    elif data_generator == 'ping':
        import ping_sensor
        payloads = ping_sensor.get_ping_data(
            timezone=timezone,
            enable_timezone_range=enable_timezone_range,
            sleep=batch_sleep,
            repeat=batch_repeat)
    elif data_generator == 'power':
        import power_company
        payloads = power_company.data_generator(
            timezone=timezone,
            enable_timezone_range=enable_timezone_range,
            sleep=batch_sleep,
            repeat=batch_repeat)
    elif data_generator == 'synchrophasor':
        import power_company_synchrophasor
        payloads = power_company_synchrophasor.data_generator(
            timezone=timezone,
            enable_timezone_range=enable_timezone_range,
            sleep=batch_sleep,
            repeat=batch_repeat)
    elif data_generator == 'trig':
        import trig
        payloads = trig.trig_value(timezone=timezone,
                                   enable_timezone_range=enable_timezone_range,
                                   sleep=batch_sleep,
                                   repeat=batch_repeat)
    elif data_generator == 'aiops':
        import customer_aiops
        payloads = customer_aiops.get_aiops_data(timezone=timezone,
                                                 sleep=batch_sleep,
                                                 repeat=batch_repeat)

    elif data_generator == 'file':
        import read_file
        payloads = read_file.read_data(dir_path=data_dir,
                                       compress=compress,
                                       exception=exception)

    return payloads