示例#1
0
def train_loop(model, optimizer, train_set, scheduler=None):
    num_mb = len(train_set) // hp.batch_size

    if scheduler:
        scheduler.step(epoch)

    for i in range(num_mb):
        # input lmfb (B x T x (F x frame_stacking))
        xs = []
        # target symbols
        ts = []
        # onehot vector of target symbols (B x L x NUM_CLASSES)
        ts_onehot = []
        # vector of target symbols for label smoothing (B x L x NUM_CLASSES)
        ts_onehot_LS = []
        # input lengths
        emo = []
        emo_onehot = []
        emo_onehot_LS = []

        lengths = []
        ts_lengths = []
        temp = []
        temp_length = []
        for j in range(hp.batch_size):
            s = train_set[i * hp.batch_size + j].strip()
            if hp.ASR:
                x_file, laborg = s.split(' ', 1)
            elif hp.dist:
                x_file, laborg, labemo, labdist = s.split('\t')
                laborg = laborg.strip()
                labemo = labemo.strip()
                labdist = labdist.strip()
            else:
                x_file, laborg, labemo = s.split('\t')
                laborg = laborg.strip()
                labemo = labemo.strip()
                #if len(laborg) == 0:
                #    laborg = "2 0 1"

            if '.htk' in x_file:
                #mean = np.load("/n/work1/feng/src/htk/mean.npy")
                #var = np.load("/n/work1/feng/src/htk/var.npy")
                cpudat = load_dat(x_file)
                cpudat = cpudat[:, :hp.lmfb_dim]
                #cpudat = (cpudat-mean)/var
                #print(mean)
            elif '.npy' in x_file:
                #mean = np.load("/n/work1/feng/data/swb/mean.npy")
                #var = np.load("/n/work1/feng/data/swb/var.npy")
                cpudat = np.load(x_file)
                #cpudat = (cpudat-mean)/var
            elif '.wav' in x_file:
                with wave.open(x_file) as wf:
                    dat = wf.readframes(wf.getnframes())
                    y = fromstring(dat, dtype=int16)[:, np.newaxis]
                    y_float = y.astype(np.float32)
                    cpudat = (y_float - np.mean(y_float)) / np.std(y_float)

            tmp = copy.deepcopy(cpudat)
            print("{} {}".format(x_file, cpudat.shape[0]))
            if hp.frame_stacking > 1 and hp.encoder_type != 'Wave':
                cpudat, newlen = frame_stacking(cpudat, hp.frame_stacking)

            newlen = cpudat.shape[0]
            if hp.encoder_type == 'CNN':
                cpudat_split = np.split(cpudat, 3, axis=1)
                cpudat = np.hstack((cpudat_split[0].reshape(newlen, 1, 80),
                                    cpudat_split[1].reshape(newlen, 1, 80),
                                    cpudat_split[2].reshape(newlen, 1, 80)))
            newlen = cpudat.shape[0]
            lengths.append(newlen)
            xs.append(cpudat)
            temp.append(tmp)
            temp_length.append(tmp.shape[0])

            cpulab = np.array([int(i) for i in laborg.split(' ')],
                              dtype=np.int32)
            #print(cpulab)

            cpulab_onehot = onehot(cpulab, hp.num_classes)
            ts.append(cpulab)
            ts_lengths.append(len(cpulab))
            ts_onehot.append(cpulab_onehot)
            ts_onehot_LS.append(0.9 * cpulab_onehot +
                                0.1 * 1.0 / hp.num_classes)
            if hp.dist and hp.ASR == False:
                cpuemo = np.array([int(x) for x in labemo], dtype=np.int32)
                emotion_onehot = onehot_dist(labdist, hp.num_emotion)
                emo_onehot.append(emotion_onehot)
                emo_onehot_LS.append(0.9 * emotion_onehot +
                                     0.1 * 1.0 / hp.num_emotion)
                emo.append(cpuemo)
            elif hp.ASR == False:
                cpuemo = np.array([int(x) for x in labemo], dtype=np.int32)
                emotion_onehot = onehot(cpuemo, hp.num_emotion)
                emo_onehot.append(emotion_onehot)
                emo_onehot_LS.append(0.9 * emotion_onehot +
                                     0.1 * 1.0 / hp.num_emotion)
                emo.append(cpuemo)

        if hp.baseline_type != 'lim_BLSTM':
            temp, temp_length = xs, lengths

        if hp.ASR:
            xs, lengths, ts, ts_onehot, ts_onehot_LS, ts_lengths = sort_pad(
                hp.batch_size, xs, lengths, ts, ts_onehot, ts_onehot_LS,
                ts_lengths)

            youtput_in_Variable = model(xs, lengths, ts_onehot, [], [])

            loss = 0.0
            if hp.decoder_type == 'Attention':
                for k in range(hp.batch_size):
                    num_labels = ts_lengths[k]
                    loss += label_smoothing_loss(
                        youtput_in_Variable[k][:num_labels],
                        ts_onehot_LS[k][:num_labels], 1) / num_labels
            print('loss = {}'.format(loss.item()))
        elif hp.baseline:
            xs, lengths, ts, ts_onehot, ts_onehot_LS, ts_lengths, emo, emo_onehot, emo_onehot_LS, temp = sort_pad(
                hp.batch_size, xs, lengths, ts, ts_onehot, ts_onehot_LS,
                ts_lengths, emo, emo_onehot, emo_onehot_LS, temp, temp_length)

            if hp.baseline_type == 'CNN_BLSTM' or hp.baseline_type == 'lim_BLSTM':
                onehot_length = temp.size(2)
                xs_new = torch.zeros((hp.batch_size, 750, onehot_length))
                for i in range(hp.batch_size):
                    feature_length = temp.size(1)
                    if feature_length > 750:
                        xs_new.data[:, :750, :] = temp.data[:, :750, :]
                    else:
                        xs_new.data[:, :
                                    feature_length, :] = temp.data[:, :
                                                                   feature_length, :]
                emotion_in_Variable = model(xs_new.to(DEVICE), [])
            else:
                #youtput_in_Variable, emotion_in_Variable = model(xs, lengths, ts_onehot, emo_onehot, [])
                emotion_in_Variable = model(xs, lengths)

            loss = 0.0
            if hp.decoder_type == 'Attention':
                #print(emo)
                #print(emotion_in_Variable[:,:hp.num_emotion])
                loss += F.cross_entropy(
                    emotion_in_Variable[:, :hp.num_emotion], emo.to(DEVICE))
                #for k in range(hp.batch_size):
                #num_labels = ts_lengths[k]
                #loss += label_smoothing_loss(youtput_in_Variable[k][:num_labels], ts_onehot_LS[k][:num_labels],1) / num_labels
                #print(emotion_in_Variable[k][:hp.num_emotion])
                #loss += F.cross_entropy(emotion_in_Variable[k][:hp.num_emotion], emo)
            print('loss = {}'.format(loss.item()))
        elif hp.text_based:
            xs, lengths, ts, ts_onehot, ts_onehot_LS, ts_lengths, emo, emo_onehot, emo_onehot_LS, temp = sort_pad(
                hp.batch_size, xs, lengths, ts, ts_onehot, ts_onehot_LS,
                ts_lengths, emo, emo_onehot, emo_onehot_LS, temp, temp_length)

            emotion_in_Variable = model(ts.to(DEVICE), ts_lengths.to(DEVICE))

            loss = 0.0
            if hp.decoder_type == 'Attention':
                #print(emo)
                #print(emotion_in_Variable[:,:hp.num_emotion])
                loss += F.cross_entropy(
                    emotion_in_Variable[:, :hp.num_emotion], emo.to(DEVICE))
                #for k in range(hp.batch_size):
                #num_labels = ts_lengths[k]
                #loss += label_smoothing_loss(youtput_in_Variable[k][:num_labels], ts_onehot_LS[k][:num_labels],1) / num_labels
                #print(emotion_in_Variable[k][:hp.num_emotion])
                #loss += F.cross_entropy(emotion_in_Variable[k][:hp.num_emotion], emo)
            print('loss = {}'.format(loss.item()))
        elif hp.combined:
            #seq1 = []
            #seq2 = []
            #seq1, seq2, xs, lengths, ts, ts_onehot, ts_onehot_LS, ts_lengths, emo, emo_onehot, emo_onehot_LS, \
            #xs1, lengths1, ts1, ts_onehot1, ts_onehot_LS1, ts_lengths1, emo1, emo_onehot1, emo_onehot_LS1 \
            #= sort_pad(hp.batch_size, xs, lengths, ts, ts_onehot, ts_onehot_LS, ts_lengths, emo, emo_onehot, emo_onehot_LS)
            xs, lengths, ts, ts_onehot, ts_onehot_LS, ts_lengths, emo, emo_onehot, emo_onehot_LS, temp = sort_pad(
                hp.batch_size, xs, lengths, ts, ts_onehot, ts_onehot_LS,
                ts_lengths, emo, emo_onehot, emo_onehot_LS, temp, temp_length)

            if hp.baseline_type == 'CNN_BLSTM' or hp.baseline_type == 'lim_BLSTM':
                onehot_length = temp.size(2)
                xs_new = torch.zeros((hp.batch_size, 750, onehot_length))
                for i in range(hp.batch_size):
                    feature_length = temp.size(1)
                    if feature_length > 750:
                        xs_new.data[:, :750, :] = temp.data[:, :750, :]
                    else:
                        xs_new.data[:, :
                                    feature_length, :] = temp.data[:, :
                                                                   feature_length, :]
                emotion_in_Variable = model(xs_new.to(DEVICE),
                                            [], ts.to(DEVICE),
                                            ts_lengths.to(DEVICE))
            else:
                emotion_in_Variable = model(xs.to(DEVICE), lengths,
                                            ts.to(DEVICE),
                                            ts_lengths.to(DEVICE))

            loss = 0.0
            if hp.decoder_type == 'Attention':
                #print(emo)
                #print(emotion_in_Variable[:,:hp.num_emotion])
                #for i in range(hp.batch_size):
                #    for j in range(hp.batch_size):
                #        if seq1[j] == i:
                #            break
                #    temp = emo[i]
                #    emo[i] = emo[j]
                #    emo[j] = temp
                loss += F.cross_entropy(
                    emotion_in_Variable[:, :hp.num_emotion], emo.to(DEVICE))
                #for k in range(hp.batch_size):
                #num_labels = ts_lengths[k]
                #loss += label_smoothing_loss(youtput_in_Variable[k][:num_labels], ts_onehot_LS[k][:num_labels],1) / num_labels
                #print(emotion_in_Variable[k][:hp.num_emotion])
                #loss += F.cross_entropy(emotion_in_Variable[k][:hp.num_emotion], emo)
            print('loss = {}'.format(loss.item()))
        elif hp.combined_ASR or hp.ASR_based:
            xs, lengths, ts, ts_onehot, ts_onehot_LS, ts_lengths, emo, emo_onehot, emo_onehot_LS, temp = sort_pad(
                hp.batch_size, xs, lengths, ts, ts_onehot, ts_onehot_LS,
                ts_lengths, emo, emo_onehot, emo_onehot_LS, temp, temp_length)

            if hp.baseline_type == 'CNN_BLSTM' or hp.baseline_type == 'lim_BLSTM':
                onehot_length = temp.size(2)
                xs_new = torch.zeros((hp.batch_size, 750, onehot_length))
                for i in range(hp.batch_size):
                    feature_length = temp.size(1)
                    if feature_length > 750:
                        xs_new.data[:, :750, :] = temp.data[:, :750, :]
                    else:
                        xs_new.data[:, :
                                    feature_length, :] = temp.data[:, :
                                                                   feature_length, :]
                youtput_in_Variable, emotion_in_Variable = model(
                    xs, lengths, ts_onehot, emo_onehot, xs_new.to(DEVICE))
            else:
                youtput_in_Variable, emotion_in_Variable = model(
                    xs, lengths, ts_onehot, emo_onehot, [])

            loss = 0.0
            if hp.decoder_type == 'Attention':
                #print(emo)
                #print(emotion_in_Variable[:,:hp.num_emotion])
                loss += F.cross_entropy(
                    emotion_in_Variable[:, :hp.num_emotion],
                    emo.to(DEVICE)) * 0.8
                print(loss)
                for k in range(hp.batch_size):
                    num_labels = ts_lengths[k]
                    loss += label_smoothing_loss(
                        youtput_in_Variable[k][:num_labels],
                        ts_onehot_LS[k][:num_labels], 1) / num_labels * 0.2
                    #print(emotion_in_Variable[k][:hp.num_emotion])
                    #loss += F.cross_entropy(emotion_in_Variable[k][:hp.num_emotion], emo)
            print('loss = {}'.format(loss.item()))

        sys.stdout.flush()
        optimizer.zero_grad()
        # backward
        loss.backward()
        clip = 1.0
        torch.nn.utils.clip_grad_value_(model.parameters(), clip)
        # optimizer update
        optimizer.step()
        loss.detach()
        torch.cuda.empty_cache()
示例#2
0
            while line:
                if "Ses0" not in line:
                    line = f.readline()
                    continue
                x_file, _, sentence = line.split(' ',2)
                sentence = sentence.translate(table)
                temp = x_file
                if temp not in name_list:
                    #print(temp)
                    line = f.readline()
                    continue
                else:
                    ind_emo = name_list.index(temp)

                position = htkpos+x_file+".htk"
                cpudat = load_dat(position)
                cpudat = cpudat[:,:40]
                if cpudat.shape[0]>2000:
                    line = f.readline()
                    string_emotion = emotion_list[ind_emo]
                    delete[int(string_emotion)] += 1
                    continue

                transcripts = []
                transcripts = (htkpos+x_file+".htk\t")
                word_list = sentence.strip().split(' ')
                #print(word_list)
                i = 0
                transcripts += "2 "
                for word in word_list:
                    word = word.strip()
示例#3
0
def test_loop(model, test_set):
    batch_size = 1
    #mean = np.load("/n/work1/feng/src/htk/mean.npy")
    #var = np.load("/n/work1/feng/src/htk/var.npy")
    acc = 0
    neutral = 0
    positive = 0
    negative = 0
    ang = 0
    ang_total = 0
    neutral_total = 0
    positive_total = 0
    negative_total = 0
    total = 0
    confusion_matrix = np.zeros((4, 4))
    for i in range(len(test_set)):
        # input lmfb (B x T x (F x frame_stacking))
        xs = []
        # target symbols
        ts = []
        # onehot vector of target symbols (B x L x NUM_CLASSES)
        ts_onehot = []
        # vector of target symbols for label smoothing (B x L x NUM_CLASSES)
        ts_onehot_LS = []
        # input lengths
        emo = []
        emo_onehot = []
        emo_onehot_LS = []

        lengths = []
        ts_lengths = []
        # input lmfb (B x T x (F x frame_stacking))
        xs1 = []
        # target symbols
        ts1 = []
        # onehot vector of target symbols (B x L x NUM_CLASSES)
        ts_onehot1 = []
        # vector of target symbols for label smoothing (B x L x NUM_CLASSES)
        ts_onehot_LS1 = []
        # input lengths
        emo1 = []
        emo_onehot1 = []
        emo_onehot_LS1 = []

        lengths1 = []
        ts_lengths1 = []
        temp = []
        temp_length = []

        for j in range(batch_size):
            s = test_set[i * batch_size + j].strip()
            #if hp.ASR:
            #    x_file = s.strip()
            #else:
            #x_file, laborg = s.split(' ', 1)
            if hp.ASR:
                x_file, laborg = s.split(' ', 1)
            elif hp.dist:
                x_file, laborg, labemo, labdist = s.split('\t')
                laborg = laborg.strip()
                labemo = labemo.strip()
                labdist = labdist.strip()
            else:
                x_file, laborg, labemo = s.split('\t')
                laborg = laborg.strip()
                labemo = labemo.strip()
                #if len(laborg) == 0:
                #    laborg = "2 0 1"
            if '.htk' in x_file:
                cpudat = load_dat(x_file)
                cpudat = cpudat[:, :hp.lmfb_dim]
                #cpudat = (cpudat-mean)/var
            elif '.npy' in x_file:
                cpudat = np.load(x_file)
                #cpudat = (cpudat-mean)/var
            elif '.wav' in x_file:
                with wave.open(x_file) as wf:
                    dat = wf.readframes(wf.getnframes())
                    y = fromstring(dat, dtype=int16)[:, np.newaxis]
                    y_float = y.astype(np.float32)
                    cpudat = (y_float - np.mean(y_float)) / np.std(y_float)

            tmp = copy.deepcopy(cpudat)
            print(x_file, end='\t')
            if hp.frame_stacking > 1 and hp.encoder_type != 'Wave':
                cpudat, newlen = frame_stacking(cpudat, hp.frame_stacking)

            newlen = cpudat.shape[0]
            if hp.encoder_type == 'CNN':
                cpudat_split = np.split(cpudat, 3, axis=1)
                cpudat = np.hstack((cpudat_split[0].reshape(newlen, 1, 80),
                                    cpudat_split[1].reshape(newlen, 1, 80),
                                    cpudat_split[2].reshape(newlen, 1, 80)))
            newlen = cpudat.shape[0]
            lengths.append(newlen)
            xs.append(cpudat)
            temp.append(tmp)
            temp_length.append(tmp.shape[0])

            if hp.ASR == False:
                cpuemo = np.array([int(x) for x in labemo], dtype=np.int32)
                emotion_onehot = onehot(cpuemo, hp.num_emotion)
                emo_onehot.append(emotion_onehot)
                emo_onehot_LS.append(0.9 * emotion_onehot +
                                     0.1 * 1.0 / hp.num_emotion)
                emo.append(cpuemo)
                cpulab = np.array([int(i) for i in laborg.split(' ')],
                                  dtype=np.int32)
                cpulab_onehot = onehot(cpulab, hp.num_classes)
                ts.append(cpulab)
                ts_lengths.append(len(cpulab))
                ts_onehot.append(cpulab_onehot)
                ts_onehot_LS.append(0.9 * cpulab_onehot +
                                    0.1 * 1.0 / hp.num_classes)

        if hp.baseline_type != 'lim_BLSTM':
            temp, temp_length = xs, lengths

        if hp.ASR:
            xs, lengths, temp = sort_pad(1,
                                         xs,
                                         lengths,
                                         temp=temp,
                                         temp_length=temp_length)
        else:
            xs, lengths, ts, ts_onehot, ts_onehot_LS, ts_lengths, emo, emo_onehot, emo_onehot_LS, temp = sort_pad(
                1, xs, lengths, ts, ts_onehot, ts_onehot_LS, ts_lengths, emo,
                emo_onehot, emo_onehot_LS, temp, temp_length)

        if hp.baseline_type == 'CNN_BLSTM' or hp.baseline_type == 'lim_BLSTM':
            onehot_length = temp.size(2)
            xs_new = torch.zeros((1, 750, onehot_length))
            for i in range(1):
                feature_length = temp.size(1)
                if feature_length > 750:
                    xs_new.data[:, :750, :] = temp.data[:, :750, :]
                else:
                    xs_new.data[:, :
                                feature_length, :] = temp.data[:, :
                                                               feature_length, :]
        if hp.ASR:
            results = model.decode(xs, lengths, [])
            for character in results:
                print(character, end=' ')
            if results == []:
                print("2 1", end=' ')
            print("\t", end='')
            print(labemo)
            #print()
            sys.stdout.flush()
        elif hp.baseline or hp.combined or hp.text_based:
            if hp.baseline:
                emotion = model.decode(xs_new.to(DEVICE), [])
            elif hp.text_based:
                emotion = model.decode(ts.to(DEVICE), ts_lengths.to(DEVICE))
            elif hp.combined:
                #emotion_in_Variable = model(xs, lengths, ts.to(DEVICE), ts_lengths.to(DEVICE), seq1, seq2)
                emotion = model.decode(xs_new.to(DEVICE), ts.to(DEVICE),
                                       ts_lengths.to(DEVICE))
                #emotion = model.decode(xs, lengths, ts1.to(DEVICE), ts_lengths1.to(DEVICE))

            print(int(labemo.strip()), end='\t')
            print((emotion), end='\t')
            print()
        else:
            results, emotion = model.decode(xs, lengths, xs_new.to(DEVICE))
            for character in results:
                print(character, end=' ')
            if results == []:
                print("2 1", end=' ')
            print('\t', end='')
            print(int(labemo.strip()), end='\t')

            print(emotion, end='\t')
            print()
            sys.stdout.flush()
示例#4
0
        x = self.denseb(x)
        x = self.densec(x)

        return x


def preprocess(x_batch, y_batch):
    x_batch = tf.cast(x_batch, dtype=tf.float32) / 255. - 0.5
    y_batch = tf.cast(y_batch, dtype=tf.int32)
    return x_batch, y_batch


batch_size = 32
epochs = 200

x_train, x_test, y_train, y_test = utils.load_dat()

number_samples = y_test.shape[0]

# print(x_train.dtype,x_test.dtype,y_train.dtype,y_test.dtype)
# print(y_train.shape)

train_set = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_set = train_set.map(preprocess).batch(batch_size)

val_set = tf.data.Dataset.from_tensor_slices((x_test, y_test))
val_set = val_set.map(preprocess).batch(batch_size)

optis = [
    tf.keras.optimizers.Adam(lr=1e-3),
    tf.keras.optimizers.SGD(learning_rate=1e-3, momentum=0.9)
示例#5
0
import numpy as np
from utils import load_dat

train = open("/n/work1/feng/data/scripts_4emotion/IEMOCAP_train.csv", "r")
test = open("/n/work1/feng/data/scripts_4emotion/IEMOCAP_test.csv", "r")

flag = 0
for line in train:
    x_file, laborg, emotion = line.strip().split("\t")
    f = load_dat(x_file)
    if flag == 0:
        temp = f
        flag = 1
    else:
        temp = np.concatenate((temp, f), axis=0)

print(test)
for line in test:
    x_file, laborg, emotion = line.strip().split("\t")
    f = load_dat(x_file)
    temp = np.concatenate((temp, f), axis=0)

print(temp.shape)
mean = np.mean(temp, axis=0)
var = np.var(temp, axis=0)
np.save("/n/work1/feng/src/htk/mean.npy", mean)
np.save("/n/work1/feng/src/htk/var.npy", var)
print(mean.shape)
print(var.shape)