Python NLU.getSlots примеры использования

Пример #1

0

Показать файл

Файл: check_labels.py Проект: cfwelch/targeted_sentiment

def t2():
	f = open('crf-input-data')
	clines = f.readlines()
	f.close()
	u2 = list()
	utt = list()
	t2 = list()
	tutt = list()
	for cl in clines:
		parts = cl.strip()
		if parts == '':
			if utt != []:
				u2.append(utt)
				t2.append(tutt)
				utt = list()
				tutt = list()
		else:
			parts = parts.split()
			utt.append(parts[0])
			tutt.append(parts[2])
	if utt != []:
		u2.append(utt)
		t2.append(tutt)
		utt = list()
		tutt = list()

	lines = NLU.getALines()
	utterances = NLU.getUtterances(lines)
	for u in range(0, len(utterances)):
		slots = NLU.getSlots(utterances[u])
		sclist = list()
		for slot in slots[0]:
			sclist.append([slot[1], slot[2]])
		entlist = NLU.getEntities(u2[u], t2[u])[0]
		l1 = list()
		l2 = sclist
		for ent in entlist:
			l1.append([ent[1], ent[2]])
		if l1 != l2:
			print(str(l1) + '_' + str(l2))

Пример #2

0

Показать файл

Файл: string_match_baseline.py Проект: cfwelch/targeted_sentiment

def instructorLevel():
    ICOR = 0
    IGUE = 0
    IACT = 0
    profs = dicts.getProfWords()
    pattern = re.compile("[\W_]+")
    print profs
    sentences = NLU.getALines()
    utterances = NLU.getUtterances(sentences)
    for u in utterances:
        names = list()
        cname = ""
        slots = NLU.getSlots(u)[1]
        tutt = u[0].strip().lower().split()
        print slots
        for tok in tutt:
            ttok = pattern.sub("", tok)
            if ttok in profs:
                if cname != "":
                    cname += " "
                cname += ttok
            else:
                if cname != "":
                    names.append(cname)
                cname = ""
        if cname != "":
            names.append(cname)
        print(names)
        slist = list()
        for slot in slots:
            slist.append(slot[0].lower())
        IACT += len(slots)
        IGUE += len(names)
        for name in names:
            if name in slist:
                ICOR += 1
    print(str(ICOR * 1.0 / IGUE))
    print(str(ICOR * 1.0 / IACT))
    print(IACT)
    return ICOR, IGUE, IACT

Пример #3

0

Показать файл

Файл: senti_set.py Проект: cfwelch/targeted_sentiment

def genLikesMap(utterances):
    likes_map.clear()
    for i in s_set:
        likes_map[i] = [list(), list()]
    for i in range(0, len(utterances)):
        slots = NLU.getSlots(utterances[i])
        speaker = s_map[utterances[i][0].strip()]
        if slots[0]:
            likes_map[speaker][0].extend(slots[0])
        if slots[1]:
            likes_map[speaker][1].extend(slots[1])
    # generate dictionary for similar likes for each person
    for q in s_set:
        simlikeq = list()
        for i in s_set:
            if i == q:
                continue
            found = False
            for j in range(0, len(likes_map[i][0])):
                if (("EECS", likes_map[i][0][j][1], likes_map[i][0][j][2])
                        in likes_map[q][0] or
                    ("", likes_map[i][0][j][1], likes_map[i][0][j][2])
                        in likes_map[q][0]
                    ) and likes_map[i][0][j][2] != "neutral":
                    #print("similar likes for " + i + " and " + q + ": " + str(likes_map[i][0][j]));
                    simlikeq.append(i)
                    found = True
                    break
            if not found:
                for j in range(0, len(likes_map[i][1])):
                    if likes_map[i][1][j] in likes_map[q][
                            1] and likes_map[i][1][j][1] != "neutral":
                        #print("similar likes for " + i + " and " + q + ": " + str(likes_map[i][1][j]));
                        simlikeq.append(i)
                        found = True
                        break
        sim_likes[q] = simlikeq

Пример #4

0

Показать файл

Файл: string_match_baseline.py Проект: cfwelch/targeted_sentiment

def classLevel():
    CCOR = 0
    CGUE = 0
    CACT = 0
    pattern = re.compile("[\W_]+")
    w = dicts.getEECSdict()
    ww = list()
    for key in w.keys():
        ww.append(w[key])
    sentences = NLU.getALines()
    utterances = NLU.getUtterances(sentences)
    for u in utterances:
        xmatches = list()
        tutt = u[0].strip().lower()
        slots = NLU.getSlots(u)[0]
        for q in tutt.split():
            qq = pattern.sub("", q)
            if is_number(qq):
                xmatches.append(qq)
        for q in ww:
            if q.lower() in tutt:
                xmatches.append(q.lower())
        slist = list()
        for slot in slots:
            slist.append(slot[1].lower())
        print(slist)
        print(xmatches)
        CACT += len(slots)
        CGUE += len(xmatches)
        for name in xmatches:
            if name in slist:
                CCOR += 1
    print(str(CCOR * 1.0 / CGUE))
    print(str(CCOR * 1.0 / CACT))
    print(CACT)
    return CCOR, CGUE, CACT

Пример #5

0

Показать файл

Файл: senti_set.py Проект: cfwelch/targeted_sentiment

def main():
    name = "MEGHAN"
    fi = open("../data/extract_samples/pID_AEU")
    pid = fi.readlines()
    fi.close()
    pidmap = dict()
    pset = set()
    for i in range(0, len(pid)):
        parts = pid[i].split("\t")
        pset.add(parts[0])
        pidmap[parts[1].strip()] = parts[0]
    fl = open("EECS_annotated_samples_anonymized")
    lines = fl.readlines()
    fl.close()
    utterances = NLU.getUtterances(lines)
    print(utterances[0])
    print("Speaker: " + pidmap[utterances[0][0].strip()])
    slots = NLU.getSlots(utterances[0])
    print(slots)
    plikes = dict()
    for i in pset:
        plikes[i] = [list(), list()]
    for i in range(0, len(utterances)):
        slots = NLU.getSlots(utterances[i])
        speaker = pidmap[utterances[i][0].strip()]
        if slots[0]:
            plikes[speaker][0].extend(slots[0])
        if slots[1]:
            plikes[speaker][1].extend(slots[1])
    print("\n\nGiven that EECS 492 sentiment is neutral...")
    #print(plikes[name]);
    wholikes = ("EECS", "492", "neutral")
    likers = list()
    for i in pset:
        if wholikes in plikes[i][0]:
            likers.append(i)
    # check instructors in likers
    ucontains_i = "Quentin Stout"
    print("\n\nWho likes " + ucontains_i)
    for i in likers:
        for j in range(0, len(plikes[i][1])):
            if plikes[i][1][j][0] == ucontains_i:
                print(i + ": " + str(plikes[i][1][j]))
    # check classes in likers
    ucontains_cd = "EECS"
    ucontains_cid = "545"
    print("\n\nWho likes " + ucontains_cd + " " + ucontains_cid)
    for i in likers:
        for j in range(0, len(plikes[i][0])):
            # don't worry about department but if you want to... then use this line
            # plikes[i][0][j][0] == ucontains_cd and
            if plikes[i][0][j][1] == ucontains_cid:
                print(i + ": " + str(plikes[i][0][j]))
    # find all people with similar sentiments to <name> in the data set
    print("\n\nSimlikes!")
    simlikesmap = dict()
    for q in pset:
        simlikes = list()
        for i in pset:
            if i == q:
                continue
            found = False
            for j in range(0, len(plikes[i][0])):
                if (("EECS", plikes[i][0][j][1], plikes[i][0][j][2])
                        in plikes[name][0] or
                    ("", plikes[i][0][j][1], plikes[i][0][j][2]
                     ) in plikes[name][0]) and plikes[i][0][j][2] != "neutral":
                    print("similar likes for " + i + " and " + name + ": " +
                          str(plikes[i][0][j]))
                    simlikes.append(i)
                    found = True
                    break
            if not found:
                for j in range(0, len(plikes[i][1])):
                    if plikes[i][1][j] in plikes[name][
                            1] and plikes[i][1][j][1] != "neutral":
                        print("similar likes for " + i + " and " + name +
                              ": " + str(plikes[i][1][j]))
                        simlikes.append(i)
                        found = True
                        break
        simlikesmap[q] = simlikes
    # calculate % of times where OSCORE will be nonzero
    times = 0
    ttimes = 0
    for u in utterances:
        slots = NLU.getSlots(u)
        speaker = pidmap[u[0].strip()]
        for slot in slots[0]:
            ttimes += 1
            oscore = 0
            for i in simlikesmap[speaker]:
                pscore = 0
                for j in range(0, len(plikes[i][0])):
                    if slot[1] == plikes[i][0][j][1]:
                        if plikes[i][0][j][2] == "positive":
                            pscore += 1
                        elif plikes[i][0][j][2] == "negative":
                            pscore -= 1
                if pscore > 0:
                    oscore += 1
                elif pscore < 0:
                    oscore -= 1
            if oscore != 0:
                times += 1
        for slot in slots[1]:
            ttimes += 1
            oscore = 0
            for i in simlikesmap[speaker]:
                pscore = 0
                for j in range(0, len(plikes[i][1])):
                    if slot[0] == plikes[i][1][j][0]:
                        if plikes[i][1][j][1] == "positive":
                            pscore += 1
                        elif plikes[i][1][j][1] == "negative":
                            pscore -= 1
                if pscore > 0:
                    oscore += 1
                elif pscore < 0:
                    oscore -= 1
            if oscore != 0:
                times += 1
    print("Times: " + str(times))
    print("Total Times: " + str(ttimes))
    print("Percentage: " + str(times * 100.0 / ttimes))

Пример #6

0

Показать файл

Файл: sentiment_agreement.py Проект: cfwelch/targeted_sentiment

def main():
    fi = open("sentimentAnnotations")
    line1 = fi.readlines()
    fi.close()
    fo = open("EECS_annotated_samples_anonymized")
    line2 = fo.readlines()
    fo.close()
    utt1 = NLU.getUtterances(line1)
    utt2 = NLU.getUtterances(line2)
    correct = 0
    wrong = 0
    NEU_NEG = 0
    NEU_POS = 0
    POS_NEG = 0
    SNEU_NEG = set()
    SNEU_NEG.add("neutral")
    SNEU_NEG.add("negative")
    SNEU_POS = set()
    SNEU_POS.add("neutral")
    SNEU_POS.add("positive")
    SPOS_NEG = set()
    SPOS_NEG.add("negative")
    SPOS_NEG.add("positive")
    disagrees = list()
    inst = 1
    insttype = "neutral"
    for i in range(0, len(utt1)):
        slots1 = NLU.getSlots(utt1[i])
        slots2 = NLU.getSlots(utt2[i])
        for j in range(0, len(slots1[0])):
            if insttype == slots2[0][j][2]:
                inst += 1
            if slots1[0][j][3] == slots2[0][j][3]:
                correct += 1
            else:
                tset = set()
                tset.add(slots1[0][j][3])
                tset.add(slots2[0][j][3])
                disagrees.append(utt1[i])
                if slots2[0][j][3] == insttype:
                    if tset == SNEU_NEG:
                        NEU_NEG += 1
                    elif tset == SNEU_POS:
                        NEU_POS += 1
                    elif tset == SPOS_NEG:
                        POS_NEG += 1
                wrong += 1
        for j in range(0, len(slots1[1])):
            if slots1[1][j][1] == slots2[1][j][1]:
                correct += 1
            else:
                tset = set()
                disagrees.append(utt1[i])
                tset.add(slots1[1][j][1])
                tset.add(slots2[1][j][1])
                if slots2[1][j][1] == insttype:
                    if tset == SNEU_NEG:
                        NEU_NEG += 1
                    elif tset == SNEU_POS:
                        NEU_POS += 1
                    elif tset == SPOS_NEG:
                        POS_NEG += 1
                wrong += 1
    print("Agree on " + str(correct))
    print("Disagree on " + str(wrong))
    print("Percent agreement is " + str(correct * 1.0 / (correct + wrong)) +
          "%")
    #print("NEU_NEG: " + str(NEU_NEG*1.0/(correct+wrong)));
    #print("NEU_POS: " + str(NEU_POS*1.0/(correct+wrong)));
    #print("POS_NEG: " + str(POS_NEG*1.0/(correct+wrong)));
    print("NEU_NEG: " + str(NEU_NEG * 1.0 / inst))
    print("NEU_POS: " + str(NEU_POS * 1.0 / inst))
    print("POS_NEG: " + str(POS_NEG * 1.0 / inst))

Пример #7

0

Показать файл

Файл: sentiment_class.py Проект: cfwelch/targeted_sentiment

def main():
    if not os.path.exists('classifiers'):
        os.makedirs('classifiers')

    allines = NLU.getALines()
    allU = NLU.getUtterances(allines)
    textLines = NLU.getTextLines(allU)
    slots = [NLU.getSlots(i) for i in allU]

    sents = list()
    targets = list()
    tagset = list()
    sent_to_xtc = dict()

    index = 0
    for i in range(len(slots)):
        tstx = []
        for etype in ENT_TYPES:
            for j in range(len(slots[i][etype])):
                tstx.append(index)
                index += 1
                targets.append(slots[i][etype][j]['sentiment'])
                ttags = [
                    slots[i][etype][j][k] for k in ALL_IDS
                    if k in slots[i][etype][j]
                ]
                tagset.append(ttags)
                sents.append(textLines[i])
        sent_to_xtc[i] = tstx

    cprint('Number of Utterances: ' + str(index))
    cprint('Length of Lines: ' + str(len(sents)))
    cprint('Length of Targets: ' + str(len(targets)))

    cv = set()
    regex = re.compile(r'[^a-zA-Z0-9_\~\- ]+')
    for sent in range(0, len(sents)):
        parts = sents[sent].split(' ')
        for part in range(0, len(parts)):
            thepart = regex.sub('', parts[part])
            # corner case for hyphens
            hps = thepart.split('-')
            if len(hps) > 1:
                for hi in range(0, len(hps)):
                    cv.add(hps[hi].lower())
            # end corner case for hyphens
            thepart = thepart.lower()
            cv.add(thepart)
    cv = list(cv)
    cprint('Vocabulary Size: ' + str(len(cv)))

    xtc = []
    for sent in range(0, len(sents)):
        #print('sentence: ' + str(sent))
        #print('s1: ' + str(sents[sent]))

        #print(sents[sent] + ' - with tagset - ' + str(tagset[sent]))
        #dparse = spwrap.parse(sents[sent])
        #print('DPARSE: ' + dparse)

        # add token boundaries to the sentence
        tokenSent = sents[sent]
        for tag in range(0, len(tagset[sent])):
            tokenSent = tokenSent.replace(tagset[sent][tag],
                                          ' ~~t~~ ' + tagset[sent][tag])
        #print(tokenSent)
        parts = regex.sub('', tokenSent)
        # this handles split and hyphen corner case
        parts = re.split(' |-', parts)

        # remove empty parts from the sentence
        while '' in parts:
            parts.remove('')

        # locate window feature indicies
        windowFeatures = []
        done = False
        while not done:
            for part in range(0, len(parts)):
                if '~~t~~' == parts[part]:
                    windowFeatures += [part]
                    parts.remove(parts[part])
                    #print('parts?: ' + str(parts))
                    break
                if part == len(parts) - 1:
                    done = True
        #print('window features: ' + str(windowFeatures))

        #print('parts: ' + str(parts))
        row = []
        # featureMapG = [[0]*300]*4
        featureMap = {}
        Nflag = 0
        for part in range(0, len(parts)):
            #thepart = regex.sub('', parts[part])
            #thepart = thepart.lower()
            thepart = parts[part].lower()
            theid = cv.index(thepart)
            #print(theid)
            #g_vec = glove_features.getGloveWord(glove_dict, parts[part])
            mindist = 999
            for wf in range(0, len(windowFeatures)):
                ##############################################################
                ## This is the distance measure for window linear distance!
                distance = abs(windowFeatures[wf] - part)
                ##############################################################
                ## This is the distance measure for dependency tree distnace!
                ## distance = spwrap.treeDistance(parts[windowFeatures[wf]], parts[part], dparse)
                ##############################################################
                if distance < mindist:
                    mindist = distance
            mindist += 1
            sentiz = senti_lexis.lexCounts(thepart)
            #for g_vi in range(0, len(g_vec)):
            #	featureMapG[0][g_vi] += g_vec[g_vi];# - mindist/10.0
            #	featureMapG[1][g_vi] += g_vec[g_vi];# - mindist/10.0
            #	featureMapG[2][g_vi] += g_vec[g_vi];# - mindist/10.0
            #	featureMapG[3][g_vi] += g_vec[g_vi];# - mindist/10.0
            if theid in featureMap:
                # 1.0 - mindist / 10.0 worked well for the first distance measure...
                # featureMap[theid] += 1.0 / mindist
                featureMap[theid][0] += 1.0 - mindist / 10.0
                featureMap[theid][1] += (1.0 - mindist / 10.0) * sentiz[0]
                featureMap[theid][2] += (1.0 - mindist / 10.0) * sentiz[1]
                featureMap[theid][3] += (1.0 - mindist / 10.0) * sentiz[2]
                if Nflag > 0:
                    featureMap[theid][4] = 1.0
            else:
                # featureMap[theid] = 1.0 / mindist
                # count, positive, negative, neutral, negate
                featureMap[theid] = [0, 0, 0, 0, 0]
                featureMap[theid][0] = 1.0 - mindist / 10.0
                featureMap[theid][1] = (1.0 - mindist / 10.0) * sentiz[0]
                featureMap[theid][2] = (1.0 - mindist / 10.0) * sentiz[1]
                featureMap[theid][3] = (1.0 - mindist / 10.0) * sentiz[2]
                if Nflag > 0:
                    featureMap[theid][4] = 1.0
            if Nflag > 0:
                Nflag -= 1
            if senti_lexis.lexNegate(thepart):
                Nflag = 2
        for i in range(0, len(cv)):
            if i in featureMap:
                row.extend(featureMap[i])
            else:
                row.extend([0, 0, 0, 0, 0])
        # add on the glove features
        # for a in range(0, len(featureMapG)):
        # 	temp_vec = []
        # 	for a_a in range(0, len(featureMapG[a])):
        # 		temp_vec.append(featureMapG[a][a_a]*1.0/len(parts))
        # 	row.extend(temp_vec)
        xtc.append(row)

    #instead read the data from splits file
    fsplits = open('splits')
    lines = fsplits.readlines()
    splits = list()
    for i in range(0, len(lines)):
        parts = lines[i].strip().split(':')
        train = list()
        test = list()
        for s in parts[0][1:-1].split(', '):
            train.append(int(s))
        for s in parts[1][1:-1].split(', '):
            test.append(int(s))
        splits.append((train, test))
    fsplits.close()
    #test print the first split
    #print(splits[0][0])
    #print(splits[0][1])

    #do gridsearch + evaluation
    fscores = open('scores_sentiment', 'w')
    bestsplit = -1
    BSscore = 0
    for i in range(0, len(splits)):
        bestC = 0
        bestGamma = 0
        bestScore = 0
        xtest = list()
        xtrain = list()
        ytest = list()
        ytrain = list()
        # add the utterance set generation here for senti_set
        # senti_utters = list()
        # for j in range(0, len(splits[i][0])):
        # 	senti_utters.append(utterances[splits[i][0][j]])
        #likesMatrix, slist = leastSquares.getMatrix(senti_utters)
        # do train-test split
        for j in range(0, len(splits[i][0])):
            #speaker = senti_set.getSpeaker(utterances[splits[i][0][j]][0])
            #cossim = leastSquares.consineUser(likesMatrix, slist.index(speaker))
            #print('\n' + speaker + ': ' + utterances[splits[i][0][j]][0].strip())
            # VECTOR is 38 x 141 -> 264 total
            for LL in range(0, len(sent_to_xtc[splits[i][0][j]])):
                #fvector = likesMatrix[slist.index(speaker)]
                #fvector = fvector.tolist()[0]
                fvector = xtc[sent_to_xtc[splits[i][0][j]][LL]]
                #fvector.append(slist.index(speaker))
                ##############################################################
                #entity = tagset[sent_to_xtc[splits[i][0][j]][LL]]
                #entity = tagset2entity(entity)
                #gscore = leastSquares.getGuess(likesMatrix, entity, slist.index(speaker))
                #gscore = leastSquares.getWeightedGuess(cossim, likesMatrix, entity)
                #print('speaker: ' + str(speaker) + ' - ' + str(slist.index(speaker)))
                #fvector.append(gscore)
                ########fvector = [gscore]
                ##############################################################
                xtrain.append(fvector)
                ytrain.append(targets[sent_to_xtc[splits[i][0][j]][LL]])
        for j in range(0, len(splits[i][1])):
            #speaker = senti_set.getSpeaker(utterances[splits[i][1][j]][0])
            #cossim = leastSquares.consineUser(likesMatrix, slist.index(speaker))
            for LL in range(0, len(sent_to_xtc[splits[i][1][j]])):
                #fvector = likesMatrix[slist.index(speaker)]
                #fvector = fvector.tolist()[0]
                fvector = xtc[sent_to_xtc[splits[i][1][j]][LL]]
                #fvector.append(slist.index(speaker))
                ##############################################################
                #entity = tagset[sent_to_xtc[splits[i][1][j]][LL]]
                #entity = tagset2entity(entity)
                #gscore = leastSquares.getGuess(likesMatrix, entity, slist.index(speaker))
                #gscore = leastSquares.getWeightedGuess(cossim, likesMatrix, entity)
                #fvector.append(gscore)
                ########fvector = [gscore]
                ##############################################################
                xtest.append(fvector)
                ytest.append(targets[sent_to_xtc[splits[i][1][j]][LL]])
        score = 0

        for gamma in numpy.linspace(0.0001, 0.05, 10):  #10steps
            for C in numpy.linspace(0.1, 10, 10):  #10steps
                #2 fold
                x1 = xtrain[len(xtrain) / 2:]
                x2 = xtrain[:len(xtrain) / 2]
                y1 = ytrain[len(ytrain) / 2:]
                y2 = ytrain[:len(ytrain) / 2]
                x11 = csr_matrix(x1)
                x22 = csr_matrix(x2)
                clf = svm.SVC(gamma=gamma, C=C)
                testout = clf.fit(x1, y1)
                score = clf.score(x2, y2)
                clf = svm.SVC(gamma=gamma, C=C)
                testout = clf.fit(x2, y2)
                score += clf.score(x1, y1)
                score /= 2
                if score > bestScore:
                    bestC = C
                    bestGamma = gamma
                    bestScore = score
                    cprint('Cross Validation Score: ' + str(score))
                    cprint('Gamma = ' + str(gamma) + ' and C = ' + str(C))

        ################ THIS IS FOR CvI EVALUATION ################
        #Ixtest = list()
        #Iytest = list()
        #Cxtest = list()
        #Cytest = list()
        #for j in range(0, len(splits[i][1])):
        #	for LL in range(0, len(sent_to_xtc[splits[i][1][j]])):
        #		fvector = xtc[sent_to_xtc[splits[i][1][j]][LL]]
        #		if coriset[sent_to_xtc[splits[i][1][j]][LL]]:
        #			Cxtest.append(fvector)
        #			Cytest.append(targets[sent_to_xtc[splits[i][1][j]][LL]])
        #		else:
        #			Ixtest.append(fvector)
        #			Iytest.append(targets[sent_to_xtc[splits[i][1][j]][LL]])
        #xtrain = csr_matrix(xtrain)
        #Cxtest = csr_matrix(Cxtest)
        #Ixtest = csr_matrix(Ixtest)
        #clf = svm.SVC(gamma=bestGamma, C=bestC)
        #testout = clf.fit(xtrain, ytrain)
        #CBscore = clf.score(Cxtest, Cytest)
        #IBscore = clf.score(Ixtest, Iytest)
        #cprint('Actual Score: ' + str(CBscore) + ':' + str(IBscore))
        #fscores.write(str(CBscore) + ':' + str(IBscore) + '\n')
        #fscores.flush()
        ###############################################################
        ################ THIS IS FOR NORMAL EVALUATION ################
        xtrain = csr_matrix(xtrain)
        xtest = csr_matrix(xtest)
        clf = svm.SVC(gamma=bestGamma, C=bestC)
        testout = clf.fit(xtrain, ytrain)
        bestScore = clf.score(xtest, ytest)
        cprint('Actual Score: ' + str(bestScore))
        fscores.write(str(bestScore) + '\n')
        ###############################################################
        # save best classifier per fold
        cString = pickle.dumps(clf)
        fsave1 = open('classifiers/sentiment_classifier' + str(i), 'w')
        fsave1.write(cString)
        fsave1.close()

    fscores.close()
    # save feature dictionary
    cvString = pickle.dumps(cv)
    fsave2 = open('sentiment_dictionary', 'w')
    fsave2.write(cvString)
    fsave2.close()

Пример #8

0

Показать файл

def getMatrix(utterances):
    GROUNDTRUTHS = True
    np.set_printoptions(threshold='nan')
    #lines = NLU.getALines();
    #do ALS stuff
    ioffset = len(classes)
    X = np.ones((len(sset), len(classes) + len(instructors))) * -1
    #print(X.shape);
    for i in range(0, len(utterances)):
        slots = NLU.getSlots(utterances[i])
        cslots = slots[0]
        islots = slots[1]
        for slot in islots:
            iname = ""
            if GROUNDTRUTHS:
                iname = slot[0]
            else:
                if slot[0] in entcache.keys():
                    iname = entcache[slot[0]]
                else:
                    iname = ed.entityDistance(slot[0])[1][1]
                    entcache[slot[0]] = iname
            if slot[1] == "positive":
                X[slist.index(smap[utterances[i][0].strip()])][
                    ioffset + instructors.index(iname)] = 10
            elif slot[1] == "negative":
                X[slist.index(smap[utterances[i][0].strip()])][
                    ioffset + instructors.index(iname)] = 0
            elif slot[1] == "neutral":
                X[slist.index(smap[utterances[i][0].strip()])][
                    ioffset + instructors.index(iname)] = 5
        for slot in cslots:
            if is_number(slot[1]):
                if slot[1] in classes:
                    if slot[2] == "positive":
                        X[slist.index(
                            smap[utterances[i][0].strip()])][classes.index(
                                slot[1])] = 10
                    elif slot[2] == "negative":
                        X[slist.index(
                            smap[utterances[i][0].strip()])][classes.index(
                                slot[1])] = 0
                    elif slot[2] == "neutral":
                        X[slist.index(
                            smap[utterances[i][0].strip()])][classes.index(
                                slot[1])] = 5
                else:
                    pass
                    #print(slot[1] + " is not a class...");
            else:
                classname = ""
                if GROUNDTRUTHS:
                    classname = slot[1]
                else:
                    if slot[1] in entcache.keys():
                        classname = entcache[slot[1]]
                    else:
                        classname = ed.entityDistance(slot[1])[0][1]
                        entcache[slot[1]] = classname
                if slot[2] == "positive":
                    X[slist.index(smap[utterances[i][0].strip()])][
                        classNames.index(classname)] = 10
                elif slot[2] == "negative":
                    X[slist.index(smap[utterances[i][0].strip()])][
                        classNames.index(classname)] = 0
                elif slot[2] == "neutral":
                    X[slist.index(smap[utterances[i][0].strip()])][
                        classNames.index(classname)] = 5
    # Add back these four lines and change return X to newX if you want to use ALS
    A, Y = nmf(X, 50)
    A = np.matrix(A)
    Y = np.matrix(Y)
    newX = A * Y
    return newX, slist