Пример #1
0
def endCluster(folderName, assigned_clusters, vectorsize, clusterType, corpus):
    trace_list = loadXES.get_trace_names(folderName+".xes")
    clusterResult= {}
    for doc_id in range(len(corpus)):
        clusterResult[trace_list[doc_id]]=assigned_clusters[doc_id]


    resultFile= open('output/'+folderName+'T2VVS'+str(vectorsize)+clusterType+'.csv','w')
    for doc_id in range(len(corpus)):
        resultFile.write(trace_list[doc_id]+','+str(assigned_clusters[doc_id])+"\n")

    resultFile.close()
    print("done with " , clusterType , " on event log ", folderName)
Пример #2
0
def cluster(folderName, vectorsize, clusterType):
    corpus = loadXES.get_doc_XES_tagged(folderName + '.xes')
    print('Data Loading finished, ', str(len(corpus)), ' traces found.')

    model = gensim.models.Doc2Vec.load('output/' + folderName + 'T2VVS' +
                                       str(vectorsize) + '.model')

    vectors = []
    NUM_CLUSTERS = 5
    print("inferring vectors")
    for doc_id in range(len(corpus)):
        inferred_vector = model.infer_vector(corpus[doc_id].words)
        vectors.append(inferred_vector)
    print("done")

    if (clusterType == "KMeans"):
        kclusterer = KMeansClusterer(
            NUM_CLUSTERS,
            distance=nltk.cluster.util.cosine_distance,
            repeats=25)
        assigned_clusters = kclusterer.cluster(vectors, assign_clusters=True)
    elif (clusterType == "HierWard"):
        ward = AgglomerativeClustering(n_clusters=NUM_CLUSTERS,
                                       linkage='ward').fit(vectors)
        assigned_clusters = ward.labels_
    elif clusterType == "OCSVM":
        ocsvm = OneClassSVM()
        assigned_clusters = ocsvm.fit_predict(vectors)

    else:
        print(
            clusterType,
            " is not a predefined cluster type. Please use 'KMeans' or 'HierWard', or create a definition for ",
            clusterType)
        return
    trace_list = loadXES.get_trace_names(folderName + ".xes")
    clusterResult = {}
    for doc_id in range(len(corpus)):
        clusterResult[trace_list[doc_id]] = assigned_clusters[doc_id]

    resultFile = open(
        'output/' + folderName + 'T2VVS' + str(vectorsize) + clusterType +
        '.csv', 'w')
    for doc_id in range(len(corpus)):
        resultFile.write(trace_list[doc_id] + ',' +
                         str(assigned_clusters[doc_id]) + "\n")

    resultFile.close()
    print("done with ", clusterType, " on event log ", folderName)