示例#1
0
import dataop as dop

a = dop.load_var("audio_stem_join")

d = dop.load_var("dicts/audio_stem_all_dict")
d = d[0]

#34910
inp = dop.create_indexMatrix(a, d, 5000)
dop.save_var(inp, "inputs/audio_stem")

#a = dop.load_var("audio_join")

#d = dop.load_var("dicts/audio_alternatives_dict")
#d = d[0]

#inp = dop.create_indexMatrix(a,d,5)
#dop.save_var(inp,"inputs/audio")
from gtrain import gtrain
from data_model_CNN import Sentence_CNN
from data_model_CNN import DataForCNN
import dataop as dop

tr = dop.load_var("inputs/audio_stem_tr")
tr_l = dop.load_var("inputs/audio_stem_tr_l")
tst = dop.load_var("inputs/audio_stem_tst")
tst_l = dop.load_var("inputs/audio_stem_tst_l")

data = DataForCNN(tr, tr_l, tst, tst_l)
model = Sentence_CNN(10, 5000)
gtrain(model,
       data,
       out_dir="runs\CNN",
       evaluate_every=1000,
       checkpoint_every=1000,
       num_epochs=10000)
示例#3
0
import dataop as dop
import re

audio = dop.load_var("audio_alternatives")
new = list()
for slide in audio :
    txt = ""
    for alt in slide :
        txt += " " + alt
    new.append(re.sub("\d","",txt))

new = new
dop.save_var(new,"audio_join")


示例#4
0
import dataop as dop

audio = dop.load_var("audio_alternatives")
new = list()
for slide in audio:
    txt = ""
    for alt in slide:
        txt += " " + alt
    new.append(dop.edit_text(txt))

new = dop.edit_text(new)
dop.save_var(new, "audio_join")

audio = dop.load_var("audio_stem_all")
new = list()
for slide in audio:
    txt = ""
    for alt in slide:
        txt += " " + alt
    new.append(dop.edit_text(txt))

new = dop.edit_text(new)
dop.save_var(new, "audio_stem_join")
示例#5
0
import dataop as dop
import numpy as np

d = dop.load_var("dicts/audio_stem_all_dict")
a = np.array(list(d[2].values()))
occ2num = dict()
for u in set(a):
    occ2num[str(u)] = int(np.sum(a >= u))
dop.save_var(occ2num, "occ2num_audio_stem")

I = dop.tfidf(
    dop.create_freq_input(dop.load_var("audio_stem_all"),
                          d[0],
                          size=occ2num[str(15)]))
I, _ = dop.normalize_lin(I, [])
I = I[:, np.sum(I != 0, 0) > 0]

dop.save_np(I, "inputs/audio_stem_tfidf_norm")

#---ocr---

d = dop.load_var("dicts/ocr_3_dict")
a = np.array(list(d[2].values()))
occ2num = dict()
for u in set(a):
    occ2num[str(u)] = int(np.sum(a >= u))
dop.save_var(occ2num, "occ2num_ocr")

I = dop.tfidf(
    dop.create_freq_input(dop.load_var("audio_stem_all"),
                          d[0],
示例#6
0
import dataop as dop
import numpy as np
from math import floor

inp = np.array(dop.load_var("inputs/audio_stem"))
l = np.array(dop.load_var("label"))
num_classes = max(l)
p = floor(len(l)/10)

out = np.zeros([len(l),num_classes])
for i in range(len(l)) :
    out[i][l[i]-1]=1

rp = np.random.permutation(len(inp))
inp = inp[rp]
out = out[rp]


dop.save_var(rp.tolist(),"rp")
dop.save_var(inp[:p].tolist(),"inputs/audio_stem_tst")
dop.save_var(out[:p].tolist(),"inputs/audio_stem_tst_l")
dop.save_var(inp[p:].tolist(),"inputs/audio_stem_tr")
dop.save_var(out[p:].tolist(),"inputs/audio_stem_tr_l")

示例#7
0
import dataop as dop

#--audio_stem--
a = dop.load_var("audio_stem_all")
a = dop.edit_text(a)

d = dop.load_var("dicts/audio_stem_all_dict")
d = d[0]

inp = dop.create_indexMatrix(a, d, 50000)
dop.save_var(inp, "inputs/audio_stem")

#--ocr--
a = dop.load_var("ocr")
a = dop.edit_text(a)

d = dop.load_var("dicts/ocr_1_dict")
d = d[0]

inp = dop.create_indexMatrix(a, d, char_level=True)
dop.save_var(inp, "inputs/ocr_char")

#--audio_all--
a = dop.load_var("audio_join")
a = dop.edit_text(a)

d = dop.load_var("dicts/audio_join_dict")
d = d[0]

inp = dop.create_indexMatrix(a, d)
dop.save_var(inp, "inputs/audio_all")