示例#1
0
def oversamp():
    x, y = dataing.data_make()  ##返回的都是数值类型
    #print(x[317],y[317])
    x_set = np.array(x, dtype=object)  ##由于是不定长数据,需要带上dtype
    y_set = np.array(y)
    '''print(x_set[0])
    print(x_set.shape)  # (516,)
    print(y_set.shape)  # (516,10)'''

    max_length = 193
    X = keras.preprocessing.sequence.pad_sequences(x_set,
                                                   maxlen=max_length,
                                                   dtype='float64',
                                                   padding='post',
                                                   value=[0.0, 0.0]).tolist()

    x = []
    for i in X:
        k = [n for a in i for n in a]
        x.append(k)
    #print(x[0])

    y = y_set[:, 0].tolist()  ###设置单标签
    #print('y_set',len(y_set))
    print(Counter(y))
    ##平衡数据:0第一类{1: 330, 0: 187},1第二类{1: 393, 0: 124},2第三类{1: 347, 0: 170}
    ########: 3第四类{0: 430, 1: 87},4第五类{1: 316, 0: 201},5第六类{0: 482, 1: 35}
    ########: 6第七类{0: 444, 1: 73},7第八类{0: 432, 1: 85},8第九类{0: 422, 1: 95},9第十类{0: 502, 1: 15}

    from imblearn.over_sampling import RandomOverSampler
    ros = RandomOverSampler(random_state=10)
    x_resampled, y_resampled = ros.fit_resample(x, y)
    print(sorted(Counter(y_resampled).items()))
    #print(x_resampled[0])
    #print(len(y_resampled))
    result = []
    for i in x_resampled:
        b = np.array(i).reshape(193, 2).tolist()  # reshape(列的长度,行的长度)
        result.append(b)

    #print(type(result),type(y_resampled))##返回的都是等长的列表类型
    return result, y_resampled
示例#2
0
import numpy as np
import pandas as pd
from tensorflow.keras.layers import Dense
import numpy as np
import pandas as pd
import random
from sklearn import metrics
import tensorflow.keras.backend as K
#在自己的库函数
import dataing  ##读取数据
import data_same_length  ##处理成定长数据
import loss_function
from tensorflow import keras

#1.读入数据,打乱顺序
x, y = dataing.data_make()
train_set = []
for i in range(len(x)):
    train_set.append([x[i], y[i]])
random.shuffle(train_set)  ###数据重排
x_set = [e[0] for e in train_set]  #特征数据
y_set = [f[1] for f in train_set]  #标签

##训练数据最大长度,可以在dataing程序中查看,目前是276
max_length = 276

##keras.preprocessing.sequence.pad_sequences将多个序列截断或补齐为相同长度,返回numpy数组
X = keras.preprocessing.sequence.pad_sequences(x_set,
                                               maxlen=max_length,
                                               dtype='float64',
                                               padding='post',
import pandas as pd
import random
from sklearn import metrics
import tensorflow.keras.backend as K

#在自己的库函数
import dataing  ##读取数据
import data_same_length  ##处理成定长数据
import loss_function

##平衡数据:0第一类{1: 330, 0: 187},1第二类{1: 393, 0: 124},2第三类{1: 347, 0: 170}
########: 3第四类{0: 430, 1: 87},4第五类{1: 316, 0: 201},5第六类{0: 482, 1: 35}
########: 6第七类{0: 444, 1: 73},7第八类{0: 432, 1: 85},8第九类{0: 422, 1: 95},9第十类{0: 502, 1: 15}

#读取数据
x, y = dataing.data_make()  ##不定长特征
#x,y = dataing.data_make_samelen()##定长特征

train_set = []
for i in range(len(x)):
    random.shuffle(x[i])
    #print(x[i])
    #a =x[i].copy()
    train_set.append([x[i], y[i]])

random.shuffle(train_set)  ###数据重排
x_set1 = [e[0] for e in train_set]  #特征数据
y_set1 = [f[1] for f in train_set]  #标签
'''x1 = x.copy()
y1 = np.array(y)[:,0].tolist()
print(len(x))
示例#4
0
# -*- coding: utf-8 -*-
#@Author  : lynch

# 导入本项目所需要的包
import dataing  ##读取数据
import data_same_length  ##处理成定长数据

import tensorflow as tf
from tensorflow.keras.layers import Dense

import numpy as np
import pandas as pd
import random

#加载数据
x, y = dataing.data_make(
)  ##分别读取读取‘training_set.csv’和‘training_label.csv’文件,不定长
train_set = []
for i in range(len(x)):
    train_set.append([x[i], y[i]])
random.shuffle(train_set)  ###数据重排
x_1 = [e[0] for e in train_set]  #特征数据
y_1 = [f[1] for f in train_set]  #标签
'''#数据长度归一化
length_x = []
for i in x_set:
    length_x.append(len(i))
max_length = max(length_x)
print('训练数据最大长度是:',max_length)
x_set = data_same_length.same_length(x_set,max_length)
print(x_set[0])
print(len(x_set[0]))'''