def getTestAccuracyData(self):
        classification = Classification.Classification()

        for i in range(80, 90):
            doc = reader.documents["essay" + str(i)]
            dataObjectList = []
            annotatedData = set(doc.annotations)
            for annotation in annotatedData:

                dataObject = {"annotation": annotation.repr,
                              "labels": annotation.labels.items(),
                              "links": annotation.links}

                dataObjectList.append(dataObject)

            data = self.ExtractDataFeatures(dataObjectList, doc.key)

        preTrainingData = classification.prepareTrainingData(data)  # arguments and links
        Arguments = preTrainingData[0]
        Links = preTrainingData[1]

        Arg_word_features = classification.getWordFeatures(Arguments)
        Link_word_features = classification.getWordFeatures(Links)

        classification.setWordfeatureSet(Arg_word_features)
        ArgumentTesting_set = nltk.classify.apply_features(classification.extract_features, Arguments)

        classification.setWordfeatureSet(Link_word_features)
        LinksTesting_set = nltk.classify.apply_features(classification.extract_features, Links)

        return [ArgumentTesting_set,LinksTesting_set]
示例#2
0
    def start(self):
        # perform some logging
        self.jlogger.info("Starting job with job id {}".format(self.job_id))
        self.jlogger.debug("Job Config: {}".format(self.config))
        self.jlogger.debug("Job Other Data: {}".format(self.job_data))

        try:
            rud.ReadUserData(self)
            fg.FeatureGeneration(self, is_train=True)
            pp.Preprocessing(self, is_train=True)
            fs.FeatureSelection(self, is_train=True)
            fe.FeatureExtraction(self, is_train=True)
            clf.Classification(self)
            cv.CrossValidation(self)
            tsg.TestSetGeneration(self)
            tspp.TestSetPreprocessing(self)
            tsprd.TestSetPrediction(self)
            job_success_status = True
        except:
            job_success_status = False
            helper.update_running_job_status(self.job_id, "Errored")
            self.jlogger.exception("Exception occurred in ML Job {} ".format(
                self.job_id))

        return job_success_status
示例#3
0
 def calculate_classification(self):
     results = Classification()
     for i in range(self.number_cross_validation):
         train_data, test_data = self.split_data_set(i)
         classified_data = self.__classify(train_data, test_data)
         result = ClassificationUnit(train_data, test_data, classified_data)
         results.add_unit(result)
     return results
def getFunction(mlalgorithm,pDict):
    """get the function corresponding to the algorithm passed as parameter."""
    
    algList=["SVM", "RandomForest", "DecisionTree", "LogisticRegression","KNearstNeighbors"]
    cl=Classification(pDict)
    
    options={
    "SVM":cl.fitSVM,
    "RandomForest":cl.fitRandomForest,
    "DecisionTree":cl.fitDecisionTree,
    "LogisticRegression":cl.fitLogisticRegression,
    "KNearstNeighbors":cl.fitKNearNeighbors
    }
    
    return options[mlalgorithm]
示例#5
0
 def __init__(self):
     self.primary_id = None
     self.other_ids = []
     self.name = ''
     self.description = ''
     self.indication = ''
     self.pharmacodynamics = ''
     self.classification = Classification()
     self.synonyms = []
     self.international_brands = []
     self.categories = []
     self.sequences = []
     self.molecular_weight = ''
     self.molecular_formula = ''
     self.pathways_drugs = []
     self.pathways_enzymes = []
     self.atc_codes = []
示例#6
0
import pandas as pd
"""
Pre-processing data
return: a csv file
"""
input_file = input("Nhap file dau vao:")
output_file = input("Nhap file dau ra:")

data = Preprocessing(output_file)
data.preprocessing(input_file)
"""
Drop label
Separate output file to training set and test set
"""
data = pd.read_csv(output_file)
X = data.drop(['RainTomorrow'], axis=1)
y = data['RainTomorrow']

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=0)
"""
Classification
"""
clf = Classification(X_train, X_test, y_train, y_test)
clf.logistic_regression()
clf.random_forest()
clf.support_vector_machine()
clf.decisiontree()
示例#7
0
X_train, Y_train, X_validation, Y_validation, X_test, Y_test = loadMNIST(
    '/home/felippe/Área de Trabalho/Felippe/Mestrado/'
    'Machine_Learning/DataBase/Computer_Vision/MNIST/')

digit = 0
X_train, Y_train = separatingClasses(5400, digit, X_train, Y_train)
X_validation, Y_validation = separatingClasses(540, digit, X_validation,
                                               Y_validation)
X_test, Y_test = separatingClasses(1000, digit, X_test, Y_test)

lr = logi_reg.LogisticRegression()

#X must be in the form (N_X, M)
lr.run((X_train.T) / 255, np.expand_dims(Y_train, axis=1).T)

cl = clc.Classification()
Y_pred = lr.predict((X_validation.T) / 255)

#Finding the best threshold
threshold = np.linspace(0.1, 0.9, 9)

F1_best = -1
for i in threshold:
    Y_label_pred = cl.prob2Label(Y_pred, i)
    F1 = cl.F1_score(np.expand_dims(Y_validation, axis=1).T, Y_label_pred)
    if (F1 > F1_best):
        best_threshold = i
        F1_best = F1

print('Best Threshold: %f' % best_threshold)
print('F1 Score in the Validation Set: %f' % F1_best)
示例#8
0
import nltk
import Classification
import pickle

from RelationAnalyzer import RelationAnalyzer
from DataProcessing import DataProcess

dataProcess = DataProcess.DataProcess()

classification = Classification.Classification()

# Get Raw data in Processed
processedData = classification.getProcessedData()

# prepare Training data from raw data
preTrainingData = classification.prepareTrainingData(processedData) #arguments and links
Arguments = preTrainingData[0]
Links = preTrainingData[1]


#get word features from training data
Arg_word_features = classification.getWordFeatures(Arguments)
Link_word_features = classification.getWordFeatures(Links)


def Arg_Extract_features(document):
    document_words = set(document)
    features = {}
    for word in Arg_word_features:
        features['contains(%s)' % word] = (word in document_words)
    return features
def classify(pTestFile):
    """Perform the classification"""

    cl = Classification(pTestFile)
    cl.classify()
示例#10
0
 def nextWindow(self):
     self.window = QMainWindow()
     self.ui = Classification()
     self.ui.classification()
     self.close()