示例#1
0
def plotDaily(
        filename    = None,
        US          = True,
        places      = util.TEST_STATES,
        cases       = True,
        day         = util.TEST_DATE,
        dark_mode   = True
    ):
    column = 'Province_State' if US else 'Country/Region'
    df = util.loadData(US=US, cases=cases).groupby(column).sum().reset_index()
    
    if dark_mode: plt.style.use('dark_background')

    colors = plt.cm.Reds(np.linspace(0.35, 0.65, len(places)))

    values = []
    for place in places:
        cumulative_data = df[df[column] == place]
        start_column = cumulative_data.columns.get_loc(util.START_DATE)
		# convert total counts to daily counts
        counts = cumulative_data.iloc[:, start_column:].diff(axis=1)
        values.append(int(counts[day]))

    plt.bar(places, values, color=colors)

    label = 'Cases' if cases else 'Deaths'
    plt.title(f'{label}, {day}')
    plt.ylabel(f'{label}')

    filename = filename if filename else f"{label}_{day.replace('/', '-')}.png"
    plt.savefig(filename)
    plt.close()
示例#2
0
 def __init__(self):
     self.F1 = []
     self.best = 0.
     self.best_ment = 0.
     data = loadData(path + '/' + "data/train_pre.json")
     self.test_x = [i['text'] for i in data[int(len(data) * 0.8):]]
     self.test_y = [[(ment['mention'], ment['offset'])
                     for ment in ments['mention_data']
                     if ment['kb_id'] != 'NIL']
                    for ments in data[int(len(data) * 0.8):]]
示例#3
0
def trainBatch(net, criterion, optimizer):
    data = train_iter.next()
    cpu_images, cpu_texts = data
    #print (type(cpu_texts), cpu_texts)
    batch_size = cpu_images.size(0)
    util.loadData(image, cpu_images)
    t, l = converter.encode(cpu_texts)

    util.loadData(text, t)
    util.loadData(length, l)

    preds = crnn(image)
    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    cost = criterion(preds, text, preds_size, length) / batch_size
    crnn.zero_grad()
    # optimizer.zero_grad()
    cost.backward()
    # optimizer.step()
    # torch.nn.utils.clip_grad_norm(crnn.parameters(), 5)
    # for p in crnn.parameters():
    #	p.data.add(-opt.lr, p.grad.data)

    # for w in crnn.parameters():
    #w.grad.data.clamp_(-5,5)
    optimizer.step()
    return cost
示例#4
0
def plotTimeSeries(
        filename    = None,
        US          = True,
        places      = util.TEST_STATES,
        cases       = True,
        num_days    = 7,
        end_date    = None,
        dark_mode   = True
    ):
    column = 'Province_State' if US else 'Country/Region'
    df = util.loadData(US=US, cases=cases).groupby(column).sum().reset_index()

    if dark_mode: plt.style.use('dark_background')

    colors = plt.cm.Oranges(np.linspace(0.35, 0.65, len(places)))

    offset = getOffset(df, end_date) if end_date else 0

    x_values = None
    for index, place in enumerate(places):
        cumulative_data = df[df[column] == place]
        start_column = cumulative_data.columns.get_loc(util.START_DATE)
        # convert total counts to daily counts
        counts = cumulative_data.iloc[:, start_column:].diff(axis=1)
        x_values = list(counts.columns[-(num_days + offset):len(counts.columns) - offset])
        y_values = [int(counts[col]) for col in x_values]

        plt.plot(x_values, y_values, label=place, color=colors[index], linewidth=2)

    label = 'Cases' if cases else 'Deaths'
    plt.title(f'Daily {label}, Last {num_days} Days')
    # control the number of date tick marks
    skip = max(num_days // 5, 1)
    plt.xticks(x_values[::skip])
    plt.xlabel('Date')
    plt.ylabel(f'{label}')
    plt.legend()
    filename = filename if filename else f'{label}_last_{num_days}.png'
    plt.savefig(filename)
    plt.close()
示例#5
0
def plotCaseMap(
        filename    = None,
        US          = True,
        day         = util.TEST_DATE,
        dark_mode   = True
    ):
    df = util.loadData(US=US)
    dates = list(df.columns)
    column = 'Province_State' if US else 'Country/Region'
    df = df.groupby(column)[dates].agg('sum')

    df['Cases'] = df.diff(axis=1)[day]
    if US:
        df['State'] = [util.STATE_TO_ABBREV.get(x, None) for x in list(df.index)]
    else:
        df['Country'] = df.index

    global_scopes = ['world', 'europe', 'africa', 'asia', 'south america', 'north america']

    fig = px.choropleth(
        df,
        locations               = 'State'       if US else 'Country',
        locationmode            = 'USA-states'  if US else 'country names',
        scope                   = 'usa'         if US else global_scopes[0],
        color                   = 'Cases',
        hover_name              = 'State'       if US else 'Country',
        # projection              = 'miller',
        color_continuous_scale  = 'Peach',
        template                = 'plotly_dark' if dark_mode else None,
        title                   = f"{'US' if US else 'Global'} Daily Cases, {day}",
        width                   = 1000,
        # height                  = 500,
        range_color             = [0,3000]
        )

    if filename is None:
        filename = 'usa_chart.png' if US else 'global_chart.png'

    fig.update_layout(margin={'l': 0, 'r': 0, 't': 70, 'b': 20}, title={'font': {'size': 20}, 'x':0.5})
    fig.write_image(filename, engine='kaleido')
def runTextModelEval(textModelName = [], PATH = '../model/doc2vec/'):
	'''
	Given a list of existed Text Model name, load them and get the baseline results one by one.
	Baseline evaluation please see baseline_classification.py

	@param:
		textModelName, a list of TextModel name
		PATH, the path to the model TextModel folder, default set to be ../model/doc2vec/
	@return: null

	'''

	[all_data, train_size, test_size, train_x, train_y, test_x] = util.loadData()
	sentences = util.data_preprocess(all_data)
	svd = TruncatedSVD(n_components=GENE_INPUT_DIM, random_state=12)
	for textModel in textModelName:

		try:
			model = wel.loadTextModel(PATH + textModel)
		except:
			print('Failed on ' + textModel)
			continue
		if model == None:
			print('Failed on ' + textModel)
			continue
		text_train_arrays, text_test_arrays = wel.getTextVec(model, train_size, test_size, 200)
		truncated_one_hot_gene = wel.getGeneVec(all_data, svd)
		truncated_one_hot_variation = wel.getVariationVec(all_data, svd)
		train_set = np.hstack((truncated_one_hot_gene[:train_size], truncated_one_hot_variation[:train_size], text_train_arrays))
		test_set = np.hstack((truncated_one_hot_gene[train_size:], truncated_one_hot_variation[train_size:], text_test_arrays))
		encoded_y = pd.get_dummies(train_y)
		encoded_y = np.array(encoded_y)

		X = np.array(train_set)
		y = np.array(bc.getLabels(encoded_y))
		print('Results for TextModel: ' + textModel)
		cm = bc.baseline(X, y)
示例#7
0
    def train(self):
        train_x, train_y, test_x, test_y, trainIndexs, testIndexs = loadData()

        epoch = 0
        EPOCHS = 20000
        while epoch <= EPOCHS:
            batch_image, batch_text = getTrainBatch(64)
            _, loss = self.sess.run(
                [self.train_step, self.cross_entropy],
                feed_dict={
                    self._x: batch_image,
                    self._y: batch_text,
                    self._prob: 0.75
                })
            print('epoch', epoch, '/loss', loss)

            if epoch % 100 == 0:
                test_batch_image, test_batch_text = getTestBatch(100)
                accu = self.accuracy.eval(
                    feed_dict={
                        self._x: test_batch_image,
                        self._y: test_batch_text,
                        self._prob: 1
                    })
                s = self.merged_summary.eval(
                    feed_dict={
                        self._x: test_batch_image,
                        self._y: test_batch_text,
                        self._prob: 1
                    })
                self.writer.add_summary(s, epoch)
                print('epoch', epoch, '/accuracy', accu)
                if accu >= 1.0:
                    break

            epoch += 1
        self.saver.save(self.sess, model_path + '/', global_step=epoch)
示例#8
0
class OurTokenizer(Tokenizer):
    def _tokenize(self, text):
        R = []
        for c in text:
            if c in self._token_dict:
                R.append(c)
            elif self._is_space(c):
                R.append('[unused1]')  # space类用未经训练的[unused1]表示
            else:
                R.append('[UNK]')  # 剩余的字符是[UNK]
        return R


tokenizer = OurTokenizer(token_dict)
char_size = 512  # 768
data = loadData(path + '/' + 'data/train_pre.json')
# data=[ for line in data]
train_data = data[:int(len(data) * 0.8)]
valid_data = data[int(len(data) * 0.8):]
dataByAlias, dataBySubjectId = loadDataBase(path + '/' + 'data/kb_data')


def seq_padding(X, padding=0):
    L = [len(x) for x in X]
    ML = max(L)
    return np.array([
        np.concatenate([x, [padding] * (ML - len(x))]) if len(x) < ML else x
        for x in X
    ])

示例#9
0
    def extractLocalFeature(self):

        success_list, failure_list = util.getSubjectFileList(
            self.record_root_path, [self.subject], self.task)

        # Divide it into training and test set
        # -------------------------------------------------------------

        # -------------------------------------------------------------
        # loading and time-sync
        d = util.loadData(success_list)

        force_array = None
        for idx in xrange(len(d['timesList'])):
            if force_array is None:
                force_array = d['ftForceList'][idx]
            else:
                force_array = np.hstack([force_array, d['ftForceList'][idx]])

        from sklearn.decomposition import PCA
        pca = PCA(n_components=1)
        res = pca.fit_transform(force_array.T)

        # -------------------------------------------------------------
        # loading and time-sync
        d = util.loadData(failure_list)

        # extract local features
        r = 0.25

        for idx in xrange(len(d['timesList'])):

            timeList = d['timesList'][idx]
            audioAzimuth = d['audioAzimuthList'][idx]
            audioPower = d['audioPowerList'][idx]
            kinEEPos = d['kinEEPosList'][idx]
            kinEEQuat = d['kinEEQuatList'][idx]

            kinEEPos = d['kinEEPosList'][idx]
            kinEEQuat = d['kinEEQuatList'][idx]

            ftForce = d['ftForceList'][idx]

            kinTargetPos = d['kinTargetPosList'][idx]
            kinTargetQuat = d['kinTargetQuatList'][idx]

            # Unimoda feature - Audio --------------------------------------------
            unimodal_audioPower = []
            for time_idx in xrange(len(timeList)):
                ang_max, ang_min = self.getAngularSpatialRF(
                    kinEEPos[:, time_idx], r)

                if audioAzimuth[time_idx] > ang_min and audioAzimuth[
                        time_idx] < ang_max:
                    unimodal_audioPower.append(audioPower[time_idx])
                else:
                    unimodal_audioPower.append(
                        power_min)  # or append white noise?

            ## power_max   = np.amax(d['audioPowerList'])
            ## power_min   = np.amin(d['audioPowerList'])
            ## self.audio_disp(timeList, audioAzimuth, audioPower, audioPowerLocal, \
            ##                 power_min=power_min, power_max=power_max)

            # Unimodal feature - Kinematics --------------------------------------
            unimodal_kinVel = []

            # Unimodal feature - Force -------------------------------------------
            # ftForceLocal = np.linalg.norm(ftForce, axis=0) #* np.sign(ftForce[2])
            unimodal_ftForce = pca.transform(ftForce.T).T
            ## self.ft_disp(timeList, ftForce, ftForceLocal)

            # Crossmodal feature - relative dist, angle --------------------------
            crossmodal_relativeDist = np.linalg.norm(kinTargetPos - kinEEPos,
                                                     axis=0)
            crossmodal_relativeAng = []
            for time_idx in xrange(len(timeList)):

                startQuat = kinEEQuat[:, time_idx]
                endQuat = kinTargetQuat[:, time_idx]

                diff_ang = qt.quat_angle(startQuat, endQuat)
                crossmodal_relativeAng.append(abs(diff_ang))
示例#10
0
@File    : test.py
@Time    : 2019/5/27 8:32
@Author  : Blue Keroro
"""
import sys
import os

curPath = os.path.abspath(os.path.dirname(__file__))
rootPath = os.path.split(curPath)[0]
sys.path.append(rootPath)

from util import loadDataBase, loadData
import json
if __name__ == '__main__':
    cnt = 0
    data = loadData('data/train.json')
    delete = list()
    for sub in data:
        map = [0 for i in sub['text']]
        length = 0
        for ment in sub['mention_data']:
            length += len(ment['mention'])
            for index in range(int(ment['offset']), int(ment['offset']) + len(ment['mention'])):
                map[index] = 1
        for i in map:
            if i == 1:
                length -= 1
        if length != 0:
            print(sub)
            delete.append(sub)
            cnt += 1
示例#11
0
def val(net, test_dataset, criterion, max_iter=100):
    print('Start val')

    for p in crnn.parameters():
        p.requires_grad = False


#    layer_dict = net.state_dict()
#    print(layer_dict['cnn.conv1.weight'])

    net.eval()
    data_loader = torch.utils.data.DataLoader(test_dataset,
                                              shuffle=False,
                                              batch_size=opt.batchSize,
                                              num_workers=int(opt.workers),
                                              collate_fn=dataset.alignCollate(
                                                  imgH=32,
                                                  imgW=100,
                                                  keep_ratio=True))
    val_iter = iter(data_loader)

    i = 0
    n = 0
    n_correct = 0
    n_text = 0
    loss_avg = util.averager()

    max_iter = len(data_loader)
    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        util.loadData(image, cpu_images)
        t, l = converter.encode(cpu_texts)

        util.loadData(text, t)
        util.loadData(length, l)

        preds = crnn(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)

        _, preds = preds.max(2)
        #preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        #	print (preds)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for pred, target in zip(sim_preds, cpu_texts):
            if isinstance(target, unicode) is False:
                target = target.decode('utf-8')
            pred_encode, _ = converter.encode(pred)
            target_encode, _ = converter.encode(target)
            t = editdistance.eval(pred_encode, target_encode)
            l = len(target_encode)
            n_correct += t
            n_text += l
            n += 1
    raw_preds = converter.decode(preds.data, preds_size.data,
                                 raw=True)[:opt.n_test_disp]
    for raw_pred, sim_pred, gt in zip(raw_preds, sim_preds, cpu_texts):
        gt = gt.lower()
        print('%-20s => %-20s, gt: %-20s' % (raw_pred, sim_pred, gt))
    len_edit = n_correct / float(n)
    len_text = n_text / float(n)
    norm = 1 - len_edit / len_text
    print('editd dist: %f, norm acc: %f' % (n_correct, norm))
示例#12
0
 def loadEvents(self, path):
     self.events = util.loadData(filename=path)
     self.all_tracks = alltracks = pandas.concat(self.events,
                                                 ignore_index=True)
示例#13
0
@author: zz
"""
#Input: XX.X
#Random Weights: X.X
#Activation: X.X
#Linear Weights: X.X (trained in real value)
# error rate:  7.9 %  [0.1 MNIST lite]

from util import loadData, normalizeData, around
from FPGA_RF_CIW_ELM import FPGA_RF_CIW_ELM
import numpy as np

prec = 3
    
train_data, train_label, test_data, test_label =  loadData(0.1)
feature_dim = train_data.shape[1]
label_dim = train_label.shape[1]

train_data = normalizeData(train_data)#/5000#int(feature_dim*1)
test_data = normalizeData(test_data)#/5000#int(feature_dim*1)
train_label[train_label==1] = 250
test_label[test_label==1] = 250
train_data = around(train_data, N_bits=prec)
test_data = around(test_data, N_bits=prec)

fpga_elm = FPGA_RF_CIW_ELM(28, 28, feature_dim*10, label_dim, 'lite', 'rf-ciw', train_data, train_label, \
                           H=0.25, randomPrec=3, actPrec=3, linearPrec=3, callPrec=3, fixedTrain = False, fixedTest=True)

print "Training data max dim:", np.max(train_data)
print "Training data min dim:", np.min(train_data)
示例#14
0
	#szs = []
	#szs.append(sz)
        preds_size = Variable(torch.IntTensor([preds.size(0)]))

        tmp = nm.split('.')[0]
        tmp2 = tmp + '.txt'
        lex_path = lex_dir + tmp2
        txt = open(lex_path).read()
        wds = txt.splitlines()
        len_lexicon = len(wds)
        lex_pred = []
        lexicon = []
        for wd in wds:
            lexicon.append(wd)
            t, l = converter.encode(wd)
            util.loadData(text, t)
            util.loadData(length, l)
            cost = criterion(preds, text, preds_size, length)
	    tmp4 = cost.data[0]
            lex_pred.append(tmp4)
        idx = lex_pred.index(min(lex_pred))
        finnal_pred = lexicon[idx]
        print('%-20s => %-20s' % (finnal_pred, lexicon[0]))

	
	_, preds = preds.max(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)

        preds_size = Variable(torch.IntTensor([preds.size(0)]))
        raw_pred = converter.decode(preds.data, preds_size.data, raw=True)
        sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
示例#15
0
    def extractLocalFeature(self):

        success_list, failure_list = util.getSubjectFileList(self.record_root_path, [self.subject], self.task)

        # Divide it into training and test set
        # -------------------------------------------------------------
        
        # -------------------------------------------------------------
        # loading and time-sync
        d = util.loadData(success_list)

        force_array = None
        for idx in xrange(len(d['timesList'])):
            if force_array is None:
                force_array = d['ftForceList'][idx]
            else:
                force_array = np.hstack([force_array, d['ftForceList'][idx] ])

        from sklearn.decomposition import PCA
        pca = PCA(n_components=1)
        res = pca.fit_transform( force_array.T )

        # -------------------------------------------------------------        
        # loading and time-sync
        d = util.loadData(failure_list)

        # extract local features
        r = 0.25


        for idx in xrange(len(d['timesList'])):

            timeList     = d['timesList'][idx]
            audioAzimuth = d['audioAzimuthList'][idx]
            audioPower   = d['audioPowerList'][idx]
            kinEEPos     = d['kinEEPosList'][idx]
            kinEEQuat    = d['kinEEQuatList'][idx]
            
            kinEEPos     = d['kinEEPosList'][idx]
            kinEEQuat    = d['kinEEQuatList'][idx]
            
            ftForce      = d['ftForceList'][idx]

            kinTargetPos  = d['kinTargetPosList'][idx]
            kinTargetQuat = d['kinTargetQuatList'][idx]

            
            # Unimoda feature - Audio --------------------------------------------
            unimodal_audioPower = []
            for time_idx in xrange(len(timeList)):
                ang_max, ang_min = self.getAngularSpatialRF(kinEEPos[:,time_idx], r)
                
                if audioAzimuth[time_idx] > ang_min and audioAzimuth[time_idx] < ang_max:
                    unimodal_audioPower.append(audioPower[time_idx])
                else:
                    unimodal_audioPower.append(power_min) # or append white noise?

            ## power_max   = np.amax(d['audioPowerList'])
            ## power_min   = np.amin(d['audioPowerList'])
            ## self.audio_disp(timeList, audioAzimuth, audioPower, audioPowerLocal, \
            ##                 power_min=power_min, power_max=power_max)
                    
            # Unimodal feature - Kinematics --------------------------------------
            unimodal_kinVel = []
            
            # Unimodal feature - Force -------------------------------------------
            # ftForceLocal = np.linalg.norm(ftForce, axis=0) #* np.sign(ftForce[2])
            unimodal_ftForce = pca.transform(ftForce.T).T
            ## self.ft_disp(timeList, ftForce, ftForceLocal)
            
            # Crossmodal feature - relative dist, angle --------------------------
            crossmodal_relativeDist = np.linalg.norm(kinTargetPos - kinEEPos, axis=0)
            crossmodal_relativeAng = []
            for time_idx in xrange(len(timeList)):

                startQuat = kinEEQuat[:,time_idx]
                endQuat   = kinTargetQuat[:,time_idx]
                
                diff_ang = qt.quat_angle(startQuat, endQuat)
                crossmodal_relativeAng.append( abs(diff_ang) )
示例#16
0
import layer
from speakerReg import speakerReg

TrainDataPath = '../../vcc2016/TFrecords/Time/Train/'
TestDataPath = '../../vcc2016/TFrecords/Time/Test/'

dataSize = 513
latentSize = 64
speakerN = 10
N = 500
L = 80
tstep = 100
hidNum = 1000
lamb = 0
tS = time.time()
trainData, Label = loadData(TrainDataPath, L, tstep)
# testData = loadData(TestDataPath)
tE = time.time()
print("loading data time: %f" % (tE-tS))

CGNNarch = {'channel' : N, 'kernel': [1, L], 'stride': [1,1]}
Regarch = {'channel' : [16, 32], 'kernel': [[1, 512], [1, 3]], 'stride': [[1,250], [1,2]], 'speaker_dim': speakerN}

source = tf.placeholder(tf.float32, shape = [None, tstep*L])
label = tf.placeholder(tf.float32, shape = [None, speakerN])
latent = tf.placeholder(tf.float32, shape = [None, N])
RegNet_en = speakerReg(Regarch, 'RegNet_en')
RegNet_de = speakerReg(Regarch, 'RegNet_de')

x = tf.reshape(source, [-1, tstep, L, 1])
GCNN_en1 = layer.gatedCNN(x, CGNNarch, 'GCNN_en1')
示例#17
0
    parser.add_argument('-train',type=str,help="-train dataset.csv path")
    parser.add_argument('-run',type=str,help="-run dataset.csv path")
    parser.add_argument('-model',type=str,help='-model model\'s path')
    parser.add_argument('-iterations',type=int,help='-iteration number of epoches')
    parser.add_argument('-finetune',type=str,help='-finetune base-model path')
    args = parser.parse_args()
    print(args)


    #Assembling Net:
    buildNet()
    #data loading:
    file_name = args.run if args.run is not None else args.train
    print("Loading data...",end="")
    d = open(file_name,'r')
    data,labels = util.loadData(d)
    data = util.reduceMatRows(data)
    labels,m1,m2 =util.reduceVector(labels,getVal=True)
    print("{} chunk loaded!\n".format(len(labels)),end="")

    if args.run is not None:
        #Loading weights
        w_name = args.model
        net.load_weights(w_name)
        epochs = "run"
        print("Starting main loop...")
        hip = 0
        reals,preds = [],[]

        for i in range(len(data)-40,len(data)):
            x = np.array(data[i]).reshape(1,12)
        if (i + 1) % 20 == 0:
            pred = np.concatenate(train_pred, axis=0)
            label = np.concatenate(train_label, axis=0)
            train_mae, train_rmse, train_mape, b = util.metric(pred, label)
            print("[epoch %d][%d/%d] loss: %.4f mae: %.4f rmse: %.4f " %
                  (epoch, i + 1, len(train_loader), loss.item(), train_mae,
                   train_rmse))

    train_mae, train_rmse, train_mape, b = util.metric(train_pred, train_label)

    return train_rmse, sum(epoch_loss)


if __name__ == '__main__':
    train_, val_, test_, test_time, A, mean, std, index = util.loadData(args)
    print(args)

    #train_loader
    train_loader = DataLoader(
        dataset=train_,
        batch_size=args.batch_size,
    )

    val_loader = DataLoader(
        dataset=val_,
        batch_size=args.batch_size,
    )

    test_loader = DataLoader(
        dataset=test_,
示例#19
0
def train(
  model,
  optimizer : optim,
  word_vocab: Vocabulary,
  char_vocab: Vocabulary,
  tag_vocab : Vocabulary,
  args,
):
  epoch_size = args.epoch_size
  batch_size = args.batch_size

  train_words, train_tags = loadData(args.train_path, word_vocab, tag_vocab, args.delimiter)
  dev_words, dev_tags = loadData(args.dev_path, word_vocab, tag_vocab, args.delimiter)

  train_size = len(train_words)
  device = torch.device(model._device)

  print(model)
  model.f1 = -1

  p_all = []
  r_all = []
  f1_all = []

  for epoch in range(1, epoch_size + 1):

    indexes = np.random.permutation(train_size)
    epoch_loss = 0.

    model.train()

    for batch_i in range(0, train_size, batch_size):
      idx = indexes[batch_i:batch_i + batch_size]

      # prepare minibatch
      batch_words, batch_word_lens, batch_word_mask, batch_chars, batch_char_lens, batch_tags = getMinibatch(
        [train_words[i] for i in idx],
        [train_tags[i] for i in idx],
        word_vocab,
        char_vocab,
        tag_vocab,
        device,
      )

      """
      for i, words in enumerate(batch_words.tolist()[:3]):
        print(" ".join(word_vocab.toTokens(words)[:batch_word_lens[i]]))
      cs = [[] for _ in range(3)]
      for j in range(len(batch_chars)):
        for i in range(3):
          cs[i] += ["".join(char_vocab.toTokens(batch_chars[j][i].tolist())[:batch_char_lens[j][i]])]

      for i in range(3):
        print(" ".join(cs[i]))
      """

      f_start = time.time()

      # forward
      loss = model(
        batch_words,
        batch_chars,
        batch_tags,
        tag_vocab,
        batch_word_mask,
        batch_word_lens,
        batch_char_lens,
      )

      f_end = time.time()
      b_start = time.time()

      # backward and update parameters
      optimizer.zero_grad()
      loss.backward()
      if model.clipping is not None and model.clipping > 0:
        nn.utils.clip_grad_norm_(model.parameters(), model.clipping)
      optimizer.step()

      b_end = time.time()

      epoch_loss += loss.tolist()

      print("epoch: {:>3d}, batch: {:>4d}, loss: {:10.4f}, forward: {: 2.2f}, backward: {: 2.2f}".format(
        epoch,
        batch_i // batch_size + 1,
        loss.tolist(),
        f_end - f_start,
        b_end - b_start,
      ))

    print("finished epoch: {}, epoch loss: {}\n".format(
      epoch,
      epoch_loss,
    ))

    model.eval()

    # calc accuracy on dev
    p, r, f1 = calcAccuracy(
      model, 
      word_vocab, 
      char_vocab,
      tag_vocab, 
      dev_words, 
      dev_tags,
    )

    p_all += [p]
    r_all += [r]
    f1_all += [f1]

    # if the optimizer is SGD, 
    #   then scheduling the initial learning rate by
    #   lr = initial_lr / ( 1.0 + 0.05 * epoch_number )
    if "SGD" in optimizer.__str__():
      lr = model.lr / (1.0 + 0.05 * epoch)

      for param_group in optimizer.param_groups:
        param_group["lr"] = lr

      print("set the learning rate of {}\n".format(lr))
      
    # save the model parameters
    if model.f1 < f1 and args.model_path is not None:
      torch.save(
        model.state_dict(),
        args.model_path,
      )
    model.f1 = max(model.f1, f1)

  print("best f1-score: {}".format(model.f1))

  """
示例#20
0
def main(dataFile):
    color, depth, labels, people = util.loadData(dataFile)
    plt.imshow(np.reshape(depth[0, :], (128, 128)))

    n, colorFeatures = color.shape
    _, depthFeatures = depth.shape

    color, depth, labels, people = shuffle(color,
                                           depth,
                                           labels,
                                           people,
                                           random_state=0)

    XColorAndDepth = np.concatenate((color, depth), axis=1)
    XDepth = depth
    XColor = color
    XShrunkDepth = util.resizeImages(128, 0.2, depth)
    print XShrunkDepth.shape
    y = labels

    XTrainColor, XTestColor, yTrainColor, yTestColor, peopleTrainColor, peopleTestColor = util.leaveOnePersonOut(
        3, XColor, y, people)

    XTrainDepth, XTestDepth, yTrainDepth, yTestDepth, peopleTrainDepth, peopleTestDepth = util.leaveOnePersonOut(
        3, XDepth, y, people)

    XTrainColorAndDepth, XTestColorAndDepth, yTrainColorAndDepth, yTestColorAndDepth, peopleTrainColorAndDepth, peopleTestColorAndDepth = util.leaveOnePersonOut(
        3, XColorAndDepth, y, people)

    XTrainShrunkDepth, XTestShrunkDepth, yTrainShrunkDepth, yTestShrunkDepth, peopleTrainShrunkDepth, peopleTestShrunkDepth = util.leaveOnePersonOut(
        3, XShrunkDepth, y, people)

    # print "Selecting linear parameters for just color"
    # c = selectParamLinear(XTrainColor, yTrainColor, peopleTrainColor)
    # clf = SVC(kernel='linear', C=c)
    # clf.fit(XTrainColor, yTrainColor)
    # yPred = clf.predict(XTestColor)
    # score = metrics.accuracy_score(yTestColor, yPred)
    # print "Selected C = " + str(c) + ", accuracy = " + str(score)

    # print "Selecting linear parameters for just depth"
    # c = selectParamLinear(XTrainDepth, yTrainDepth, peopleTrainDepth)
    # clf = SVC(kernel='linear', C=c)
    # clf.fit(XTrainDepth, yTrainDepth)
    # yPred = clf.predict(XTestDepth)
    # score = metrics.accuracy_score(yTestDepth, yPred)
    # print "Selected C = " + str(c) + ", accuracy = " + str(score)

    # print "Selecting linear parameters for color and depth"
    # c = selectParamLinear(XTrainColorAndDepth, yTrainColorAndDepth, peopleTrainColorAndDepth)
    # clf = SVC(kernel='linear', C=c)
    # clf.fit(XTrainColorAndDepth, yTrainColorAndDepth)
    # yPred = clf.predict(XTestColorAndDepth)
    # score = metrics.accuracy_score(yTestColorAndDepth, yPred)
    # print "Selected C = " + str(c) + ", accuracy = " + str(score)

    # print "Selecting rbf parameters for just color"
    # gamma, c = selectParamRBF(XTrainColor, yTrainColor, peopleTrainColor)
    # clf = SVC(kernel='rbf', C=c, gamma=gamma)
    # clf.fit(XTrainColor, yTrainColor)
    # yPred = clf.predict(XTestColor)
    # score = metrics.accuracy_score(yTestColor, yPred)
    # print "Selected C = " + str(c) + ", gamma = " + str(gamma) + ", accuracy = " + str(score)

    # print "Selecting rbf parameters for just depth"
    # gamma, c = selectParamRBF(XTrainDepth, yTrainDepth, peopleTrainDepth)
    # clf = SVC(kernel='rbf', C=c, gamma=gamma)
    # clf.fit(XTrainDepth, yTrainDepth)
    # yPred = clf.predict(XTestDepth)
    # score = metrics.accuracy_score(yTestDepth, yPred)
    # print "Selected C = " + str(c) + ", gamma = " + str(gamma) + ", accuracy = " + str(score)

    print "Selecting rbf parameters for just shrunken depth"
    gamma, c = selectParamRBF(XTrainShrunkDepth, yTrainShrunkDepth,
                              peopleTrainShrunkDepth)
    clf = SVC(kernel='rbf', C=c, gamma=gamma)
    clf.fit(XTrainShrunkDepth, yTrainShrunkDepth)
    yPred = clf.predict(XTestShrunkDepth)
    score = metrics.accuracy_score(yTestShrunkDepth, yPred)
    print "Selected C = " + str(c) + ", gamma = " + str(
        gamma) + ", accuracy = " + str(score)
示例#21
0
        else:
            print("                             /**testation info**/")
            print("----avarage test loss:", self.test_loss)
            print("PW:")
            print("----avarage accuracy:", self.test_accuracy_pw)
            # print("----avarage f1-Score of N:", self.test_f1_pw[0])
            print("----avarage f1-Score of B:", self.test_f1_pw[1])
            print("PPH:")
            print("----avarage accuracy :", self.test_accuracy_pph)
            # print("----avarage f1-Score of N:", self.test_f1_pph[0])
            print("----avarage f1-Score of B:", self.test_f1_pph[1])
            # print("IPH:")
            # print("----avarage accuracy:", self.test_accuracy_iph)
            # print("----avarage f1-Score of N:", self.test_f1_1_iph)
            # print("----avarage f1-Score of B:", self.test_f1_2_iph)


# train && test
if __name__ == "__main__":
    # 读数据
    print("Loading Data...")
    X_train, y_train, len_train, pos_train, length_train, position_train, \
    X_valid, y_valid, len_valid, pos_valid, length_valid, position_valid, \
    X_test, y_test, len_test, pos_test, length_test, position_test=util.loadData()

    # print("Run Model...\n\n\n")
    model = BiLSTM()
    model.fit(X_train, y_train, len_train, pos_train, length_train,
              position_train, X_valid, y_valid, len_valid, pos_valid,
              length_valid, position_valid, X_test, y_test, len_test, pos_test,
              length_test, position_test, "test", False)
示例#22
0
    def SGD(self, X, Y, startLearningRate, miniBatchFraction, epoch, keepProb):
        """
        使用梯度下降法训练模型

        Parameters
        ----------
        X : np.array
            自变量.
        Y : np.array
            因变量.
        startLearningRate : TYPE
            DESCRIPTION.
        miniBatchFraction : TYPE
            DESCRIPTION.
        epoch : TYPE
            DESCRIPTION.
        keepProb : TYPE
            DESCRIPTION.

        Returns
        -------
        None.

        """
        summary = tf.summary.merge_all()
        trainStep = tf.Variable(0)
        learningRate = tf.train.exponential_decay(startLearningRate,
                                                  trainStep,
                                                  1000,
                                                  0.96,
                                                  staircase=True)
        method = tf.train.GradientDescentOptimizer(learningRate)
        optimizer = method.minimize(self.loss, global_step=trainStep)
        batchSize = int(X.shape[0] * miniBatchFraction)
        batchNum = int(np.ceil(1 / miniBatchFraction))
        sess = tf.Session()
        self.sess = sess
        init = tf.global_variables_initializer()
        sess.run(init)
        summary_writer = tf.summary.FileWriter(self.logPath,
                                               graph=tf.get_default_graph())
        step = 0
        while (step < epoch):
            for i in range(batchNum):
                batchX = X[i * batchSize:(i + 1) * batchSize]
                batchY = Y[i * batchSize:(i + 1) * batchSize]
                sess.run(
                    [optimizer],
                    feed_dict={
                        self.input: batchX,
                        self.label: batchY,
                        self.keepProb: keepProb
                    })
                step += 1
                #评估模型效果,将日志写入文件
                self.evaluation(step)
                summary_str = sess.run(summary,
                                       feed_dict={
                                           self.input: X,
                                           self.label: Y,
                                           self.keepProb: 1.0
                                       })
                summary_writer.add_summary(summary_str, step)
                summary_writer.flush()
            return self

        def fit(self,
                startLearningRate=0.1,
                miniBatchFraction=0.01,
                epoch=200,
                keepProb=0.7):
            """
            训练模型
            """

            X = self.trainSet["X"]
            Y = self.trainSet["Y"]
            self.input = tf.placeholder(tf.float32,
                                        shape=[None, X.shape[1]],
                                        name="X")
            self.label = tf.placeholder(tf.int64,
                                        shape=[None, self.size[-1]],
                                        name="Y")
            self.keepProb = tf.placeholder(tf.float32)
            self.defineANN()
            self.defineLoss()
            self.SGD(X, Y, startLearningRate, miniBatchFraction, epoch,
                     keepProb)

        def predict_proba(self, X):
            """
            使用神经网络对未知数据进行预测

            Parameters
            ----------
            X : TYPE
                DESCRIPTION.

            Returns
            -------
            None.

            """
            sess = self.sess
            pred = tf.nn.softmax(logits=self.out, name="pred")
            prob = sess.run(pred,
                            feed_dict={
                                self.input: X,
                                self.keepProb: 1.0
                            })
            return prob

        if __name__ == "__main__":
            data = loadData()
            trainData, validationData, trainLabel, validationLabel = train_test_split(
                data[0], data[1], test_size=0.3, random_state=1001)
            trainSet = {"X": trainData, "Y": trainLabel}
            validationSet = {"X": validationData, "Y": validationLabel}
            testSet = {"X": data[2], "Y": data[3]}
            #windows与Linu的储存路径不同
            if os.name == "nt":
                ann = ANN([30, 20, 10], "logs\\mnist", trainSet, validationSet,
                          testSet)
            else:
                ann = ANN([30, 20, 10], "logs/mnist", trainSet, validationSet,
                          testSet)
            ann.fit()
示例#23
0
from util import loadData
from biterm import Biterm
import numpy as np
import preprocess, time, pickle
#file_name = '../Data/testdata.manualSUBSET.2009.06.14.csv'
#file_name = '../Data/training.1600000.processed.noemoticon.csv'
file_name = '../Data/train-PROCESSED-FINAL.csv'
#file_name = '../Data/train-PROCESSED.csv'

tweets = loadData(file_name)

tweets = preprocess.splitWords(tweets)

tweets = tweets

number_of_topics = 100
a = 100.0/number_of_topics
b = 0.001
max_iter = 200
mdl = 16

bt = Biterm(a,b,number_of_topics,max_iter,mdl);
start = time.time()

bt.fit(tweets)
end = time.time()
print end - start
bt.showTopics(10)
[phi, theta] = bt.getParams()
file_name = "model" + str(mdl) + ".pkl"
pickle.dump( bt, open( file_name, "wb" ) )
示例#24
0
    data = []
    with open(path, 'r', encoding='utf-8') as file:
        for line in file:
            line = line.strip()
            line = eval(line)
            line = [str(i) for i in line]
            data.append(line)
    return data


def loadDevData(path):
    data = loadData(path)
    ret = []
    for sub in data:
        ret.append(list(sub['text']))
    return ret


if __name__ == '__main__':
    labels = ['O', 'B-ment', 'I-ment']
    data = loadData('data/train_pre.json')
    with open('data/train_text.txt', 'w', encoding='utf-8') as file:
        for line in tqdm(data):
            line = reduce(line, labels)
            file.write('\n'.join(line))
            file.write('\n\n')
    # train_x, train_y = get_train_data('data/train_text.txt')
    # print(train_x)
    # print(train_y)
    # print(predict_reduce(train_x[0], train_y[0],labels))
示例#25
0
from util import loadData
from biterm import Biterm
import numpy as np
import preprocess, time, pickle, sys
import unicodecsv as csv
#file_name = '../Data/testdata.manualSUBSET.2009.06.14.csv'
# file_name = '../Data/training.1600000.processed.noemoticon.csv'
file_name = '../Data/train-AGGREGATED.csv'
out_file_name = '../Data/train-PROCESSED-FINAL.csv'
tweets = loadData(file_name)
if len(sys.argv) > 1:
    tweets = tweets[0:int(sys.argv[1])]
# tweets = [tweets[i] for i in np.random.permutation(len(tweets))]

print 'Starting preprocessing'
tweets_dict = preprocess.preprocess(tweets)
max_idx = max(tweets_dict.keys())

with open(file_name, 'rb') as csvfile:
    reader = csv.reader(csvfile)
    with open(out_file_name, 'wb') as outfile:
        writer = csv.writer(outfile)
        for i, row in enumerate(reader):
            if i > max_idx:
                break
            elif i not in tweets_dict:
                continue

            row[-1] = ' '.join(tweets_dict[i])
            writer.writerow(row)
示例#26
0
def loadDevData(path):
    data = loadData(path)
    ret = []
    for sub in data:
        ret.append(list(sub['text']))
    return ret
示例#27
0
                                    shape=[None, Y.shape[1]],
                                    name="Y")
        self.keepProb = tf.placeholder(tf.float32)
        self.defineCNN()
        self.defineLoss()
        self.SGD(X, Y, startLearningRate, miniBatchFraction, epoch, keepProb)

    def predict_proba(self, X):
        """
        使用神经网络对未知数据进行预测
        """
        sess = self.sess
        pred = tf.nn.softmax(logits=self.out, name="pred")
        prob = sess.run(pred, feed_dict={self.input: X, self.keepProb: 1.0})
        return prob


if __name__ == "__main__":
    data = loadData()
    trainData, validationData, trainLabel, validationLabel = train_test_split(
        data[0], data[1], test_size=0.3, random_state=1001)
    trainSet = {"X": trainData, "Y": trainLabel}
    validationSet = {"X": validationData, "Y": validationLabel}
    testSet = {"X": data[2], "Y": data[3]}
    # Windows下的存储路径与Linux并不相同
    if os.name == "nt":
        ann = CNN("logs\\mnist_cnn", trainSet, validationSet, testSet)
    else:
        ann = CNN("logs/mnist_cnn", trainSet, validationSet, testSet)
    ann.fit()
示例#28
0
def select_train(train_name):
    if train_name in ('1NN', '3NN', '5NN'):
        k = int(train_name[0])
        return train_knn, k
    elif train_name == 'SVM':
        return train_svm, 0  # 0表示KinKNN无效
    elif train_name == 'J48':
        return train_tree, 0


# 根据最优特征子集校验计算准确率准确率和维度缩减率
def check(trainX, trainY, predictX, predictY, optimal_feature_subset, feature, trainSelect, KinKNN):
    feature_list = numtofea(optimal_feature_subset, feature)
    data_sample = read_data_fea(feature_list, trainX)
    data_predict = read_data_fea(feature_list, predictX)
    accuracy = trainSelect(data_sample, trainY, data_predict, predictY, KinKNN)
    return accuracy


if __name__ == '__main__':
    trainX, trainY, predictX, predictY, loop_condition, initialization_parameters = util.loadData('heart', 1, 2)
    num_fea_original = mat(trainX).shape[1]  # 特征长度
    feature = []  # 特征集合索引,特征集合的角标
    trainName = 'J48'
    trainSelect = select_train(trainName)
    for i in range(num_fea_original):
        feature.append(i)
    accuracy = check(trainX, trainY, predictX, predictY, [1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0], feature, trainSelect,
                     1)
    print trainName + '验证准确率:', accuracy