def genAnalysis(modelfile,testfile,outputfile):
    maxParagraphLength = 20
    maxParagraphs = 5
    filterSizes = [1]
    num_filters = 64
    wordEmbeddingDimension = 30
    lrate = float(1e-3)
    labels = 30938
    vocabularySize = 101939

    model = Model(maxParagraphs,maxParagraphLength,labels,vocabularySize,filterSizes,num_filters,wordEmbeddingDimension,lrate)

    testing = DataParser(maxParagraphs,maxParagraphLength,labels,vocabularySize)
    testing.getDataFromfile(testfile)

    model.load(modelfile)

    print("loading done")
    print("no of test examples: " + str(testing.totalPages))

    batchSize = 1
    testing.restore()
    truePre=[]
    pred=[]
    for itr in range(testing.totalPages):
        data=testing.nextBatch(1)
        truePre.append(data[0])
        pre=model.predict(data)
        pred.append(pre[0])

    labelIDName = open("../labelId-labelName-full.txt").read().split("\n")
    labelIDName = [  [ int(x.split("\t")[0]) , x.split("\t")[1].rstrip() ] for x in labelIDName]
    # print(labelIDName)    

    #making it a dictionary
    labelName = dict(labelIDName)
    # print(labelName[9026])

    f = open(outputfile,"w")
    for i,v in enumerate(pred):
        temp = [(labId,labProb) for labId,labProb in enumerate(v) ]
        temp = sorted(temp,key=lambda x:x[1],reverse=True)  #sorting based on label probability to get top k
        predLabel = [0]*len(temp)

        output = ""
        for itr in range(11):
            predLabel[temp[itr][0]] = 1
            if truePre[i][0][temp[itr][0]] == 1:
                output = output + "," + labelName[temp[itr][0]]
        f.write(str(i) + ","  + output + "\n")
    f.close()
Пример #2
0
def analyse(modelfile,testfile,outputfile):
    maxParagraphLength = 20
    maxParagraphs = 10
    filterSizes = [2,3,4]
    num_filters = 64
    wordEmbeddingDimension = 100
    lrate = float(0.001)
    poolLength = 2
    labels = 30938
    vocabularySize = 101939

    model = Model(maxParagraphs,maxParagraphLength,labels,vocabularySize,\
                    filterSizes,num_filters,wordEmbeddingDimension,lrate,poolLength)

    testing = DataParser(maxParagraphs,maxParagraphLength,labels,vocabularySize)
    testing.getDataFromfile(testfile)

    model.load(modelfile)

    print("loading done")
    print("no of test examples: " + str(testing.totalPages))

    batchSize = 1
    testing.restore()
    truePre=[]
    pred=[]
    for itr in range(testing.totalPages):
        data=testing.nextBatch(1)
        truePre.append(data[0])
        pre=model.predict(data)
        pred.append(pre[0])

    labelids = open("../../dataset/sorted_labelid_sans5toplabels.txt","r").read().strip().split("\n")
    labelids = [ int(x) for x in labelids ]

    no_of_partition = 10
    partition_size = labels / no_of_partition
    rank1 = [0]*no_of_partition
    rank3 = [0]*no_of_partition
    rank5 = [0]*no_of_partition

    for i,v in enumerate(pred):
        temp = [(labId,labProb) for labId,labProb in enumerate(v) ]
        temp = sorted(temp,key=lambda x:x[1],reverse=True)  #sorting based on label probability to get top k
        rank1[ labelids.index( temp[0][0] ) / partition_size ] += 1
        rank3[ labelids.index( temp[0][0] ) / partition_size ] += 1
        rank5[ labelids.index( temp[0][0] ) / partition_size ] += 1

        rank3[ labelids.index( temp[1][0] ) / partition_size ] += 1
        rank5[ labelids.index( temp[1][0] ) / partition_size ] += 1
        rank3[ labelids.index( temp[2][0] ) / partition_size ] += 1
        rank5[ labelids.index( temp[2][0] ) / partition_size ] += 1


        rank5[ labelids.index( temp[3][0] ) / partition_size ] += 1
        rank5[ labelids.index( temp[4][0] ) / partition_size ] += 1

    rank1 = [ ( float(x) /testing.totalPages )*100 for x in rank1  ]
    rank3 = [ ( float(x) /( 3 * testing.totalPages) )*100 for x in rank3  ]
    rank5 = [ ( float(x) /( 5 * testing.totalPages) )*100 for x in rank5  ]

    print( rank1)
    print( rank3) 
    print(rank5)

    filePtr = open( outputfile , "w")
    for i in rank1:
        filePtr.write( str(i) + "," )
    filePtr.write("\n")

    for i in rank3:
        filePtr.write( str(i) + "," )
    filePtr.write("\n")

    for i in rank5:
        filePtr.write( str(i) + "," )
    filePtr.close()
Пример #3
0
def analyse(modelfile, testfile, outputfile):
    maxParagraphLength = 20
    maxParagraphs = 10
    filterSizes = [2, 3, 4]
    num_filters = 64
    wordEmbeddingDimension = 100
    lrate = float(0.001)
    poolLength = 2
    labels = 30938
    vocabularySize = 101939

    model = Model(maxParagraphs,maxParagraphLength,labels,vocabularySize,\
                    filterSizes,num_filters,wordEmbeddingDimension,lrate,poolLength)

    testing = DataParser(maxParagraphs, maxParagraphLength, labels,
                         vocabularySize)
    testing.getDataFromfile(testfile)

    model.load(modelfile)

    print("loading done")
    print("no of test examples: " + str(testing.totalPages))

    batchSize = 1
    testing.restore()
    truePre = []
    pred = []
    for itr in range(testing.totalPages):
        data = testing.nextBatch(1)
        truePre.append(data[0])
        pre = model.predict(data)
        pred.append(pre[0])

    labelids = open("../../dataset/labelid_labelcount_sans5toplabels.txt",
                    "r").read().strip().split("\n")
    labelcounts = [int((x.split("\t"))[1]) for x in labelids]
    print labelcounts

    totalNoofDocuments = 19406  # 6137 (test instances ) +
    rangelist = [
        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100
    ]

    no_of_partition = len(rangelist)
    rank1 = [0] * no_of_partition
    rank3 = [0] * no_of_partition
    rank5 = [0] * no_of_partition

    for i, v in enumerate(pred):
        temp = [(labId, labProb) for labId, labProb in enumerate(v)]
        temp = sorted(
            temp, key=lambda x: x[1],
            reverse=True)  #sorting based on label probability to get top k

        for ind, count in enumerate(rangelist):
            if labelcounts[temp[0][0]] <= (count * totalNoofDocuments) / 100:
                rank1[ind] += 1
                rank3[ind] += 1
                rank5[ind] += 1

            if labelcounts[temp[1][0]] <= (count * totalNoofDocuments) / 100:
                rank3[ind] += 1
                rank5[ind] += 1

            if labelcounts[temp[2][0]] <= (count * totalNoofDocuments) / 100:
                rank3[ind] += 1
                rank5[ind] += 1

            if labelcounts[temp[3][0]] <= (count * totalNoofDocuments) / 100:
                rank5[ind] += 1

            if labelcounts[temp[4][0]] <= (count * totalNoofDocuments) / 100:
                rank5[ind] += 1

    rank1 = [(float(x) / testing.totalPages) * 100 for x in rank1]
    rank3 = [(float(x) / (3 * testing.totalPages)) * 100 for x in rank3]
    rank5 = [(float(x) / (5 * testing.totalPages)) * 100 for x in rank5]

    print(rank1)
    print(rank3)
    print(rank5)

    filePtr = open(outputfile, "w")
    for i in rank1:
        filePtr.write(str(i) + ",")
    filePtr.write("\n")

    for i in rank3:
        filePtr.write(str(i) + ",")
    filePtr.write("\n")

    for i in rank5:
        filePtr.write(str(i) + ",")
    filePtr.close()
Пример #4
0
paragraphLength = int(sys.argv[1])
maxParagraphs = int(sys.argv[2] )
filterSizes = [int(i) for i in sys.argv[3].split("-")]
print(filterSizes)
num_filters = int(sys.argv[4])
wordEmbeddingDimension = int(sys.argv[5])
batchSize= int(sys.argv[6])
epochEnd = int(sys.argv[7])
folder_name = sys.argv[8]
nlabels = 8
vocabularySize = 244

training = DataParser(maxParagraphs,paragraphLength,nlabels,vocabularySize)
training.getDataFromfile("../../Reuter_dataset/reuters_sparse_training.txt")
model = Model(maxParagraphs,paragraphLength,nlabels,vocabularySize,filterSizes,num_filters,wordEmbeddingDimension)

costfile = open("results/costfile.txt","a")
output = folder_name

epoch=0
# epochEnd=400
costepochs = []

for e in range(epoch,epochEnd):
    
    cost=0

    for itr in range(int(training.totalPages/batchSize)):
        cost += model.train(training.nextBatch(batchSize))
def ComputeFscore(modelfile, testfile, outputfile):
    maxParagraphLength = int(sys.argv[1])
    maxParagraphs = int(sys.argv[2])
    filterSizes = [int(i) for i in sys.argv[3].split("-")]
    num_filters = int(sys.argv[4])
    wordEmbeddingDimension = int(sys.argv[5])
    # batchSize= int(sys.argv[6])
    # epochs= int(sys.argv[7])
    # folder_name = sys.argv[8]

    labels = 8
    vocabularySize = 244

    model = Model(maxParagraphs, maxParagraphLength, labels, vocabularySize,
                  filterSizes, num_filters, wordEmbeddingDimension)

    testing = DataParser(maxParagraphs, maxParagraphLength, labels,
                         vocabularySize)
    testing.getDataFromfile(testfile)

    model.load(modelfile)

    print("loading done")

    testing.restore()
    truePre = []
    pred = []
    for itr in range(testing.totalPages):
        data = testing.nextBatch(1)
        truePre.append(data[0])
        pre = model.predict(data)
        pred.append(pre[0])

    labelsCount = {}
    ConfusionMa = {}
    fScr = {}

    thres = 0.5
    valid = int(
        len(truePre) * 0.5
    )  #using first 50% data for threshold tuning - we have merged test and cv files
    labelsCount = {}
    ConfusionMa = {}
    fScr = {}
    thresLab = {}
    for la in range(labels):
        if la % 25 == 0:
            print("Current label", la)
        t = []
        p = []
        for i in range(valid):
            t.append(truePre[i][0][la])
            p.append(pred[i][la])
        bestF, bestThre = thresholdTuning(t, p)

        t = []
        p = []
        for i in range(valid, len(truePre)):
            t.append(truePre[i][0][la])
            p.append(pred[i][la])

        p = np.array(p)
        fScr[la] = f1_score(t, p >= bestThre)
        ConfusionMa[la] = confusion_matrix(t, p > bestThre)
        thresLab[la] = bestThre

    f = open(outputfile, "a")
    output = sys.argv[9]

    sum_fscore = 0.0
    for i in range(labels):
        sum_fscore = sum_fscore + fScr[i]
        output = output + " , " + str(fScr[i])
    output += " , " + str(sum_fscore / float(labels - 1))
    print("Fscore at " + sys.argv[7] + " epochs: " +
          str(sum_fscore / float(labels - 1)))
    f.write(output + "\n")
    f.close()
Пример #6
0
def ComputePrecisionK(modelfile, testfile, outputfile):
    maxParagraphLength = int(sys.argv[1])
    maxParagraphs = int(sys.argv[2])
    filterSizes = [int(i) for i in sys.argv[3].split("-")]
    num_filters = int(sys.argv[4])
    wordEmbeddingDimension = int(sys.argv[5])
    lrate = float(sys.argv[10])
    poolLength = int(sys.argv[11])

    labels = 30938
    vocabularySize = 101939

    model = Model(maxParagraphs, maxParagraphLength, labels, vocabularySize,
                  filterSizes, num_filters, wordEmbeddingDimension, lrate,
                  poolLength)

    testing = DataParser(maxParagraphs, maxParagraphLength, labels,
                         vocabularySize)
    testing.getDataFromfile(testfile)

    model.load(modelfile)

    print("loading done")
    print("no of test examples: " + str(testing.totalPages))

    print("Computing Prec@k")

    #check if batchsize needs to be taken by parameter

    batchSize = 1
    testing.restore()
    truePre = []
    pred = []
    for itr in range(testing.totalPages):
        data = testing.nextBatch(1)
        truePre.append(data[0])
        pre = model.predict(data)
        pred.append(pre[0])

    k = 5
    for i in [1, 3, 5]:
        val = ndcg_score(truePre, pred, i)
        print(val)

    def dcg_score(y_true, y_score, k=5):
        """Discounted cumulative gain (DCG) at rank K.

        Parameters
        ----------
        y_true : array, shape = [n_samples]
            Ground truth (true relevance labels).
        y_score : array, shape = [n_samples, n_classes]
            Predicted scores.
        k : int
            Rank.

        Returns
        -------
        score : float
        """
        order = np.argsort(y_score)[::-1]
        y_true = np.take(y_true, order[:k])

        gain = 2**y_true - 1

        discounts = np.log2(np.arange(len(y_true)) + 2)
        return np.sum(gain / discounts)

    def ndcg_score(ground_truth, predictions, k=5):
        """Normalized discounted cumulative gain (NDCG) at rank K.

        Normalized Discounted Cumulative Gain (NDCG) measures the performance of a
        recommendation system based on the graded relevance of the recommended
        entities. It varies from 0.0 to 1.0, with 1.0 representing the ideal
        ranking of the entities.

        Parameters
        ----------
        ground_truth : array, shape = [n_samples]
            Ground truth (true labels represended as integers).
        predictions : array, shape = [n_samples, n_classes]
            Predicted probabilities.
        k : int
            Rank.

        Returns
        -------
        score : float

        Example
        -------
        >>> ground_truth = [1, 0, 2]
        >>> predictions = [[0.15, 0.55, 0.2], [0.7, 0.2, 0.1], [0.06, 0.04, 0.9]]
        >>> score = ndcg_score(ground_truth, predictions, k=2)
        1.0
        >>> predictions = [[0.9, 0.5, 0.8], [0.7, 0.2, 0.1], [0.06, 0.04, 0.9]]
        >>> score = ndcg_score(ground_truth, predictions, k=2)
        0.6666666666
        """
        lb = LabelBinarizer()
        lb.fit(range(len(predictions) + 1))
        T = lb.transform(ground_truth)

        scores = []

        # Iterate over each y_true and compute the DCG score
        for y_true, y_score in zip(T, predictions):
            actual = dcg_score(y_true, y_score, k)
            best = dcg_score(y_true, y_true, k)
            score = float(actual) / float(best)
            scores.append(score)

        return np.mean(scores)
Пример #7
0
def ComputePrecisionK(modelfile, testfile, outputfile):
    maxParagraphLength = int(sys.argv[1])
    maxParagraphs = int(sys.argv[2])
    filterSizes = [int(i) for i in sys.argv[3].split("-")]
    num_filters = int(sys.argv[4])
    wordEmbeddingDimension = int(sys.argv[5])
    lrate = float(sys.argv[10])
    labels = 30938
    vocabularySize = 101939

    model = Model(maxParagraphs, maxParagraphLength, labels, vocabularySize,
                  filterSizes, num_filters, wordEmbeddingDimension, lrate)

    testing = DataParser(maxParagraphs, maxParagraphLength, labels,
                         vocabularySize)
    testing.getDataFromfile(testfile)

    model.load(modelfile)

    print("loading done")
    print("no of test examples: " + str(testing.totalPages))

    print("Computing Prec@k")

    #check if batchsize needs to be taken by parameter

    batchSize = 1
    testing.restore()
    truePre = []
    pred = []
    for itr in range(testing.totalPages):
        data = testing.nextBatch(1)
        truePre.append(data[0])
        pre = model.predict(data)
        pred.append(pre[0])

    K_list = [1, 3, 5, 10, 15]  #prec@1 .....prec@NoofLabels
    precAtK = [0.0] * 16

    # #As need to get Prec only on last 50% of test data as first 50% is for cross validation
    # valid=int(len(truePre)*0.5)
    # pred = pred[valid:]
    # truePre = truePre[valid:]

    for i, v in enumerate(pred):
        temp = [(labId, labProb) for labId, labProb in enumerate(v)]
        temp = sorted(
            temp, key=lambda x: x[1],
            reverse=True)  #sorting based on label probability to get top k
        for ele in K_list:  #1....No of Labels
            pBag = 0  #no of true positive for this instance
            for itr in range(ele):  #top k ie top ele
                if truePre[i][0][temp[itr][0]] == 1:
                    precAtK[ele] += 1
                # pBag += 1
            # precAtK[ele] += float(pBag)/float(ele)

    f = open(outputfile, "a")
    output = sys.argv[9]

    for k in K_list:
        precAtK[k] /= (k * len(pred))
        print("Prec@" + str(k) + " = " + str(precAtK[k]))
        output = output + "," + "Prec@" + str(k) + "=," + str(precAtK[k])

    # for key,val in enumerate(precAtK):
    #     if key in K_list :	#As need Prec@k only on Klist ie 1,3,5,10
    #         # print( val )
    #         # print( val/len(pred) )
    #         print( "prec@" + key + "=" + str(val/(key*len(pred))) )
    #         # f.write(str(key)+","+ str(val/len(pred))+"\n")
    #         # f.write(str(key)+","+ str(val/(key*len(pred))+"\n")
    #         output = output + ", prec@" + key + "," + str(val/(key*len(pred)))
    f.write(output + "\n")
    f.close()
Пример #8
0
def ComputePrecisionK(modelfile, testfile, param, ep, outputfile):

    param = param.split("_")

    maxParagraphLength = param[1]
    maxParagraphs = param[2]
    filterSizes = [int(x) for x in param[3].split("-")]

    num_filters = param[4]
    wordEmbeddingDimension = param[5]
    lrate = float(param[8])
    poolLength = param[9]
    labels = 30938
    vocabularySize = 101939

    # print(filterSizes)
    # print(maxParagraphLength)
    # print(maxParagraphs)
    # print(lrate)

    model = Model(maxParagraphs, maxParagraphLength, labels, vocabularySize,
                  filterSizes, num_filters, wordEmbeddingDimension, lrate,
                  poolLength)

    testing = DataParser(maxParagraphs, maxParagraphLength, labels,
                         vocabularySize)
    testing.getDataFromfile(testfile)

    model.load(modelfile)

    print("loading done")
    print("no of test examples: " + str(testing.totalPages))

    print("Computing Prec@k")

    #check if batchsize needs to be taken by parameter

    batchSize = 1
    testing.restore()
    truePre = []
    pred = []
    for itr in range(testing.totalPages):
        data = testing.nextBatch(1)
        truePre.append(data[0])
        pre = model.predict(data)
        pred.append(pre[0])

    K_list = [1, 3, 5]  #prec@1 .....prec@NoofLabels
    precAtK = [0.0] * 6

    # #As need to get Prec only on last 50% of test data as first 50% is for cross validation
    # valid=int(len(truePre)*0.5)
    # pred = pred[valid:]
    # truePre = truePre[valid:]

    for i, v in enumerate(pred):
        temp = [(labId, labProb) for labId, labProb in enumerate(v)]
        temp = sorted(
            temp, key=lambda x: x[1],
            reverse=True)  #sorting based on label probability to get top k
        for ele in K_list:  #1....No of Labels
            pBag = 0  #no of true positive for this instance
            for itr in range(ele):  #top k ie top ele
                if truePre[i][0][temp[itr][0]] == 1:
                    precAtK[ele] += 1
                # pBag += 1
            # precAtK[ele] += float(pBag)/float(ele)

    f = open(outputfile, "a")
    output = str(m) + "/cnn_dynamicmaxpool_" + str(ep)

    for k in K_list:
        precAtK[k] /= (k * len(pred))
        print("Prec@" + str(k) + " = " + str(precAtK[k]))
        # output = output + "," + "Prec@" + str(k) + "=," + str(precAtK[k])

    f.write(output + "\n")
    f.close()
Пример #9
0
def genAnalysis(modelfile, testfile, confusionFile):
    maxParagraphLength = 20
    maxParagraphs = 5
    filterSizes = [1]
    num_filters = 64
    wordEmbeddingDimension = 30
    lrate = float(1e-3)
    labels = 30938
    vocabularySize = 101939

    model = Model(maxParagraphs, maxParagraphLength, labels, vocabularySize,
                  filterSizes, num_filters, wordEmbeddingDimension, lrate)

    testing = DataParser(maxParagraphs, maxParagraphLength, labels,
                         vocabularySize)
    testing.getDataFromfile(testfile)

    model.load(modelfile)

    print("loading done")

    testing.restore()
    truePre = []
    pred = []
    for itr in range(testing.totalPages):
        data = testing.nextBatch(1)
        truePre.append(data[0])
        pre = model.predict(data)
        pred.append(pre[0])

    valid = int(
        len(truePre) * 0.5
    )  #using first 25% data for threshold tuning - we have merged test and cv files
    thresLab = {}
    for la in range(labels):
        t = []
        p = []
        for i in range(valid):
            t.append(truePre[i][0][la])
            p.append(pred[i][la])
        bestF, bestThre = thresholdTuning(t, p)
        thresLab[la] = bestThre

    print(thresLab)

    labelIDName = open("../labelId-labelName-full.txt").read().split("\n")
    labelIDName = [[int(x.split("\t")[0]),
                    x.split("\t")[1].rstrip()] for x in labelIDName]
    # print(labelIDName)

    #making it a dictionary
    labelname = dict(labelIDName)
    # print(labelName[9026])

    f = open(confusionFile, "w")
    for itr in range(valid,
                     testing.totalPages):  #on next 75% getting analaysis
        predLabel = [pred[itr][i] > thresLab[i] for i in range(labels)]
        output = ""
        for i in range(labels):
            if predLabel[i] == 1:
                output = output + "," + labelname[i]

        tn, fp, fn, tp = confusion_matrix(truePre[itr][0], predLabel).ravel()
        f.write(
            str(itr) + "," + str(tn) + "," + str(fp) + "," + str(fn) + "," +
            str(tp) + "," + output + "\n")
    f.close()
def ComputePrecisionK(modelfile,testfile):
    maxParagraphLength = 20
    maxParagraphs = 10
    filterSizes = [2,3]
    num_filters = 16
    wordEmbeddingDimension = 50
    lrate = float(0.001)
    poolLength = 5
    labels = 30938
    vocabularySize = 101939

    model = Model(maxParagraphs,maxParagraphLength,labels,vocabularySize,\
                    filterSizes,num_filters,wordEmbeddingDimension,lrate,poolLength)

    testing = DataParser(maxParagraphs,maxParagraphLength,labels,vocabularySize)
    testing.getDataFromfile(testfile)

    model.load(modelfile)

    print("loading done")
    print("no of test examples: " + str(testing.totalPages))

    batchSize = 1
    testing.restore()
    truePre=[]
    pred=[]
    for itr in range(testing.totalPages):
        data=testing.nextBatch(1)
        truePre.append(data[0])
        pre=model.predict(data)
        pred.append(pre[0])

    labelids = open("../../dataset/sorted_labelid.txt","r").read().strip().split("\n")
    labelids = [ int(x) for x in labelids ]

    no_of_partition = 10
    partition_size = labels / no_of_partition
    prec1 = [0]*no_of_partition
    prec3 = [0]*no_of_partition
    prec5 = [0]*no_of_partition

    for i,v in enumerate(pred):
        temp = [(labId,labProb) for labId,labProb in enumerate(v) ]
        temp = sorted(temp,key=lambda x:x[1],reverse=True)  #sorting based on label probability to get top k
        #finding how many of these were true

        if truePre[i][0][temp[0][0]] == 1:
            prec1[ labelids.index( temp[0][0] ) / partition_size ] += 1
            prec3[ labelids.index( temp[0][0] ) / partition_size ] += 1
            prec5[ labelids.index( temp[0][0] ) / partition_size ] += 1

        if truePre[i][0][temp[1][0]] == 1:
            prec3[ labelids.index( temp[1][0] ) / partition_size ] += 1
            prec5[ labelids.index( temp[1][0] ) / partition_size ] += 1

        if truePre[i][0][temp[2][0]] == 1:
            prec3[ labelids.index( temp[2][0] ) / partition_size ] += 1
            prec5[ labelids.index( temp[2][0] ) / partition_size ] += 1

        if truePre[i][0][temp[3][0]] == 1:
            prec5[ labelids.index( temp[3][0] ) / partition_size ] += 1

        if truePre[i][0][temp[4][0]] == 1:
            prec5[ labelids.index( temp[4][0] ) / partition_size ] += 1

    print( prec1 )
    print( prec3 ) 
    print( prec5 )

    prec1 = [ ( float(x) /testing.totalPages )*100 for x in prec1  ]
    prec3 = [ ( float(x) /( 3 * testing.totalPages) )*100 for x in prec3  ]
    prec5 = [ ( float(x) /( 5 * testing.totalPages) )*100 for x in prec5  ]

    
    print( prec1 )
    print( prec3 ) 
    print( prec5 )
Пример #11
0
folder_name = sys.argv[8]
lrate = float(sys.argv[9])
poolLength = int(sys.argv[10])

nlabels = 30938
# nlabels = 968 for parabel cluster 16
vocabularySize = 101939

training = DataParser(maxParagraphs, paragraphLength, nlabels, vocabularySize)
# training.getDataFromfile("../../dataset/minusTop5Labels/wiki10_minusTop5labels_train.txt")
#training.getDataFromfile("../../dataset/minusTop5Labels/wiki10_minusTop5labels_train.txt")
# training.getDataFromfile("/home/khushboo/wiki10/dataset/original_split/wiki10_train_cluster_16.txt")
training.getDataFromfile(
    "/home/khushboo/wiki10/dataset/original_split/wiki10_miml_train.txt")
model = Model(maxParagraphs, paragraphLength, nlabels, vocabularySize,
              filterSizes, num_filters, wordEmbeddingDimension, lrate,
              poolLength)

costfile = open("results/costfile.txt", "a")
output = folder_name

epoch = 0
# epochEnd=400
costepochs = []

for e in range(epoch, epochEnd):

    cost = 0

    for itr in range(int(training.totalPages / batchSize)):
        cost += model.train(training.nextBatch(batchSize))