Exemplos de Utilities em Python, exemplos de commons.Utilities em Python

Exemplo n.º 1

0

Exibir arquivo

 def speak(alias):
     sounds_dir = Utilities.voices_path()
     save_dir = Utilities.absolute_path(sounds_dir, alias + '.mp3')
     if os.path.exists(save_dir) is True:
         # print save_dir
         pygame.mixer.init(44100, -16, 2, 2048)
         pygame.mixer.music.load(save_dir)
         pygame.mixer.music.play()
         time.sleep(1.05)

Exemplo n.º 2

0

Exibir arquivo

 def __init__(self, logFilename=None, utilObj=None):
     if (utilObj!=None):
         self.util=utilObj
     elif(logFilename!=None):
         self.util = Utilities.Utility()
         self.util.setupLogFileLoc(logFilename)
         self.util.startTimeTrack()

Exemplo n.º 3

0

Exibir arquivo

 def __init__(self,
              vectordim=100,
              wordFreqIgnored=2,
              epoches=20,
              noOfWorkers=20,
              learningRate=0.025,
              distributedMem=1,
              logFilename=None,
              utilObj=None):
     if (utilObj != None):
         self.__util = utilObj
     elif (logFilename != None):
         self.__util = Utilities.Utility()
         self.__util.setupLogFileLoc(logFilename)
     self.__util.logDebug(self.__util.DOC2VECFRAME, 'Initialising doc2vec')
     self.__util.startTimeTrack()
     # self.__model = Doc2Vec(hashfxn=customHash.customHash, size=vectordim, min_count=wordFreqIgnored, iter=epoches, workers=noOfWorkers, alpha=learningRate, dm=distributedMem)
     self.__model = Doc2Vec(size=vectordim,
                            min_count=wordFreqIgnored,
                            iter=epoches,
                            workers=noOfWorkers,
                            alpha=learningRate,
                            dm=distributedMem)
     self.__util.logDebug(
         self.__util.DOC2VECFRAME,
         'Initialising doc2vec completed ' + self.__util.stopTimeTrack())

Exemplo n.º 4

0

Exibir arquivo

 def __init__(self, utilObj=None, logFile=None):
     if (utilObj != None):
         self._util = utilObj
     elif (logFile != None):
         self._util = Utilities.Utility()
         self._util.setupLogFileLoc(logFile)
     pass

Exemplo n.º 5

0

Exibir arquivo

Arquivo: End2EndPipelineDemo.py Projeto: webdevoir/16073301_KahSiongTan_AnsAppForms

    def __init__(self,
                 locationOfCVs=None,
                 utilObj=None,
                 logFilename=None,
                 **kwargs):
        if (utilObj != None):
            self.util = utilObj
        elif (logFilename != None):
            self.util = Utilities.Utility()
            self.util.setupLogFileLoc(logFilename)

        self.cvDataframe = pd.DataFrame(
            columns=('filename', 'content', 'vector', 'proba',
                     'label'))  # Structure of the csv to be saved
        self.predictionModelType = kwargs['predictionModelType']

        if (self.predictionModelType == 'topic'):
            self.ldaModelFilename = kwargs['ldaModelFilename']
        elif (self.predictionModelType == 'sim'):
            self.vectorSpaceModelType = kwargs['vectorSpaceModelType']
            self.vectorSpaceModelFilename = kwargs['vectorSpaceModelFilename']
            self.predictionTrainingFilename = kwargs[
                'predictionTrainingFilename']
        else:
            self.vectorSpaceModelType = kwargs['vectorSpaceModelType']
            self.vectorSpaceModelFilename = kwargs['vectorSpaceModelFilename']
            self.predictionTrainingFilename = kwargs[
                'predictionTrainingFilename']

        self.saveResultsFilename = kwargs['saveResultsFilename']

        self.locationOfCVs = locationOfCVs

        pass

Exemplo n.º 6

0

Exibir arquivo

 def __init__(self):
     super(Cognitive, self).__init__()
     self.api = cognitive_face
     file_path = Utilities.absolute_path(__file__, 'secret.json')
     with open(file_path) as data_file:
         data = json.load(data_file)
         KEY = data['secret']
         self.api.Key.set(KEY)

Exemplo n.º 7

0

Exibir arquivo

    def register(self, data_path):
        """Register a group of people specified in data folder and train

            Args:
                data_path (string): train data folder
        """
        if data_path == 'default':
            data_path = Utilities.train_data_path()
        if Utilities.file_exists(data_path):
            group = ModelFactory.registered_group()
            group.save()
            for alias_name in os.listdir(data_path):
                # Ignore gitkeep file and collect data from all folders
                if alias_name != '.gitkeep':
                    logger.log('Registering %s...' % alias_name)
                    Person.register(group, alias_name)
            # After everything is done, call api to train newly created group
            self.train()

Exemplo n.º 8

0

Exibir arquivo

 def __init__(self, logFilename=None, utilObj=None, vectordim=100, wordFreqIgnored=1,epoches=200, noOfWorkers=10,learningRate=0.025):
     if (utilObj!=None):
         self.util=utilObj
     elif(logFilename!=None):
         self.util = Utilities.Utility()
         self.util.setupLogFileLoc(logFilename)
     self.model = Doc2Vec(size=vectordim, min_count=wordFreqIgnored, iter=epoches, workers=noOfWorkers,alpha=learningRate)
     self.util.logDebug('D2V', 'Initialising D2V')
     self.util.startTimeTrack()
     self.util.logDebug('D2V','Initialising D2V completed ' + self.util.stopTimeTrack())

Exemplo n.º 9

0

Exibir arquivo

Arquivo: EnsembleClassifier.py Projeto: webdevoir/16073301_KahSiongTan_AnsAppForms

    def __init__(self, logFile=None, utilObj=None, **kwargs):
        if (utilObj != None):
            self._util = utilObj
        elif (logFile != None):
            self._util = Utilities.Utility()
            self._util.setupLogFileLoc(logFile)
        self._util.startTimeTrack()

        self.keywordArgs=kwargs
        print(self.keywordArgs)

Exemplo n.º 10

0

Exibir arquivo

 def __init__(self, logFilename=None, utilObj=None, maxDim=None, ngram=1):
     if (utilObj!=None):
         self.util=utilObj
     elif(logFilename!=None):
         self.util = Utilities.Utility()
         self.util.setupLogFileLoc(logFilename)
     self.util.logDebug('TFIDF', 'Initialising TFIDF')
     self.util.startTimeTrack()
     self.model=TfidfVectorizer(max_features=maxDim, ngram_range=(1,ngram))
     self.util.logDebug('TFIDF',
                          'Initialising TFIDF completed ' + self.util.stopTimeTrack())
     pass

Exemplo n.º 11

0

Exibir arquivo

Arquivo: face_recognizer.py Projeto: anhhoangiot/people_recognition_pi

    def __init__(self, camera):
        """Initialization

                Args:
                        camera (int): index of camera to use
        """
        self.cascade_file = Utilities.absolute_path(__file__, 'cascade.xml')
        self.face_cascade = cv2.CascadeClassifier(self.cascade_file)
        self.video_capture = cv2.VideoCapture(camera)
        self.video_capture.set(cv2.cv.CV_CAP_PROP_FRAME_WIDTH, 640)
        self.video_capture.set(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT, 480)
        self.faces_captured = 1

Exemplo n.º 12

0

Exibir arquivo

Arquivo: person.py Projeto: anhhoangiot/people_recognition_pi

    def register(group, alias):
        data_path = Utilities.absolute_path(Utilities.train_data_path(), alias)

        with open(os.path.join(data_path, 'name.txt'), 'r') as file:
            name = file.read()
            person = group.person_with_name(name)
            person.alias = alias
            create_voice_thread = threading.Thread(
                name="create_voice",
                target=Person.__create_voice,
                args=(name, alias))
            person.save()

            save_faces_thread = threading.Thread(name="save_faces",
                                                 target=Person.__save_faces,
                                                 args=(person, data_path))

            processes = ProcessParallel(create_voice_thread, save_faces_thread)
            processes.fork_threads()
            processes.start_all()
            # Wait until all threads are done
            processes.join_all()

Exemplo n.º 13

0

Exibir arquivo

Arquivo: VectorSimPredictionModel.py Projeto: webdevoir/16073301_KahSiongTan_AnsAppForms

    def __init__(self, logFile=None, utilObj=None):
        """

        :param ldaModel:
        :param topicMapping:
        :param labelledTestSamples: Can be any features since the vector doesn't matter.
        """
        if (utilObj != None):
            self._util = utilObj
        elif (logFile != None):
            self._util = Utilities.Utility()
            self._util.setupLogFileLoc(logFile)
        self._util.startTimeTrack()

Exemplo n.º 14

0

Exibir arquivo

Arquivo: markedForDeleteClassifierLoader.py Projeto: webdevoir/16073301_KahSiongTan_AnsAppForms

    def __init__(self, type=None, utilObj=None, utilName=None):
        if (utilObj!=None):
            self.util=utilObj
        elif(utilName!=None):
            selfutil = Utilities.Utility()
            self.util.setupLogFileLoc(utilName)


        if (type==self.TYPE_SVM):
            self.classifier=SVC()
        elif(type==self.TYPE_MULTINORM_NAIVEBAYES):
            self.classifier=MultinomialNB()
        pass

Exemplo n.º 15

0

Exibir arquivo

Arquivo: MLClassifierPredictionModel.py Projeto: webdevoir/16073301_KahSiongTan_AnsAppForms

    def __init__(self, logFile=None, utilObj=None, classifierType=None, classifierParams=None):

        if (utilObj != None):
            self._util = utilObj
        elif (logFile != None):
            self._util = Utilities.Utility()
            self._util.setupLogFileLoc(logFile)

        self._util.startTimeTrack()

        if (classifierType==self.TYPE_SVM):
            self._model = CalibratedClassifierCV(base_estimator=LinearSVC())
            # self._model = LinearSVC()
            self._type=classifierType
        elif (classifierType==self.TYPE_LOG):
            self._model = LogisticRegression(solver='newton-cg',n_jobs=30)
            self._type = classifierType
        elif (classifierType==self.TYPE_SVC):
            self._model = SVC(C=5,kernel='linear')
            self._type = classifierType

        elif (classifierType==self.TYPE_MLP):
            self._model = MLPClassifier()
            self._type = classifierType
        elif (classifierType==self.TYPE_NB):
            self._model = MultinomialNB()
            self._type = classifierType
        elif (classifierType==self.TYPE_NB_GAUSSIAN):
            self._model = GaussianNB()
            self._type = classifierType
        elif (classifierType == self.TYPE_TREE):
            self._model = DecisionTreeClassifier()
            self._type = classifierType
        elif (classifierType == self.TYPE_XGBOOST):
            self._model = XGBClassifier(max_depth=3, learning_rate=0.001, n_estimators=100, silent=False, objective='binary:logistic', booster='gbtree', n_jobs=32, nthread=None, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, colsample_bylevel=1, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, base_score=0.5, random_state=0, seed=None, missing=None)
            self._type = classifierType
        elif (classifierType == self.TYPE_SEQ):


            self._model = KerasWrapper(utilObj=self._util)
            self._type = classifierType

        elif (classifierType == self.TYPE_DUMMY):
            try:
                self._model = DummyClassifier()
                self._type = classifierType
            except Exception as error:
                print(error)
        else:
            self._util.logError('MLClassifierPredictionModel','An instance of the classifier needs to be passed to this class...exiting')
            exit(-1)

Exemplo n.º 16

0

Exibir arquivo

 def create_voice(text, alias):
     sounds_dir = Utilities.voices_path()
     save_dir = Utilities.absolute_path(sounds_dir, alias + '.mp3')
     if os.path.exists(save_dir) is False:
         headers = {
             'Content-Type': 'application/json',
             'api_key': 'b5db987b5a944ff78097d435a5a564dc'
         }
         try:
             response = requests.request(
                 'POST',
                 'http://api.openfpt.vn/text2speech/v3',
                 headers=headers,
                 data=text
             )
             if response.status_code not in (200, 202):
                 print response.status_code
             if response.text:
                 result = response.json()
                 if result['async']:
                     wget.download(result['async'], save_dir)
         except Exception as e:
             print e

Exemplo n.º 17

0

Exibir arquivo

    def __init__(self, logFile=None, utilObj=None, ldaModelFilename=None):
        """

        :param ldaModel:
        :param topicMapping:
        :param labelledTestSamples: Can be any features since the vector doesn't matter.
        """
        if (utilObj != None):
            self._util = utilObj
        elif (logFile != None):
            self._util = Utilities.Utility()
            self._util.setupLogFileLoc(logFile)

        self._model = LDA.LDA(utilObj=self._util)
        if (ldaModelFilename != None):
            self._model.loadModel(ldaModelFilename)

        _trained = True
        pass

Exemplo n.º 18

0

Exibir arquivo

Arquivo: face_recognizer.py Projeto: anhhoangiot/people_recognition_pi

    def start_capturing(self, is_register):
        """Start the camera, look for faces and write to file when faces are captured

                Args:
                        is_register (bool): determine if the session is
                        registering or identifying
                            If this argument is true then camera will capture 3
                            photos of user. Otherwise, only 1 photo is captured
        """
        if is_register is False:
            self.faces_captured = DEFAULT_FACES - 1
        while self.faces_captured < DEFAULT_FACES:
            # Read frame from video capture
            return_code, frame = self.video_capture.read()
            # Use Haar cascade to detect faces in captured frame
            faces = self.face_cascade.detectMultiScale(
                cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY),
                scaleFactor=1.1,
                minNeighbors=5,
                minSize=(30, 30),
                flags=cv2.cv.CV_HAAR_SCALE_IMAGE
            )
            # Looping through captured faces and write to file
            for (x, y, w, h) in faces:
                # All captured images will be saved in tmp folder in
                # core/face_recognizer
                filePath = Utilities.absolute_path(
                    __file__, 'tmp/face%d.jpg' % self.faces_captured)
                self.faces_captured += 1
                cv2.imwrite(filePath, frame)
                if is_register:
                    # Sleep 2 seconds so user can change face orientation
                    time.sleep(2)
                break

            if cv2.waitKey(1) & 0xFF == ord('q'):
                self.stop_capturing()
                break

            # cv2.imshow('Video', frame)

        self.stop_capturing()

Exemplo n.º 19

0

Exibir arquivo

Arquivo: ModelPhase_MLCLASSIFIER.py Projeto: webdevoir/16073301_KahSiongTan_AnsAppForms

        appFolderLocation = None
        if (len(sys.argv) == 11):
            listOfAppD2vFoldersOrderByLabelStr = sys.argv[8]
            listOfAppD2vFoldersOrderByLabel = listOfAppD2vFoldersOrderByLabelStr.split(
                ',')
            listOfLabelsOrderByLabelStr = sys.argv[9]
            listOfLabelsOrderByLabel = list(
                map(int, listOfLabelsOrderByLabelStr.split(',')))
            appFolderLocation = sys.argv[10]

            print('listOfAppD2vFoldersOrderByLabelStr:',
                  listOfAppD2vFoldersOrderByLabelStr)
            print('listOfLabelsOrderByLabelStr:', listOfLabelsOrderByLabelStr)
            print('appFolderLocation:', appFolderLocation)

        util = Utilities.Utility()
        util.setupLogFileLoc(logFile=logFile)
        util.setupTokenizationRules(tokenRules)

        predictionModel = MLClassifierPredictionModel.MLClassifierPredictionModel(
            utilObj=util, classifierType=classifierType)
        predictionModel.train(sampleLabelledTrainingFilename)
        predictionModel.loadXYtest(sampleLabelledTestFilename)
        eval = Evaluator.Evaluator(utilObj=util)
        predictionModel.evaluate(approach_vsm, eval)

        if (len(sys.argv) == 11):
            evalHeu = HeuristicEvaluator.HeuristicEvaluator(
                utilObj=util,
                listOfAppD2vFoldersOrderByLabel=listOfAppD2vFoldersOrderByLabel,
                listOfLabelsOrderByLabel=listOfLabelsOrderByLabel,

Exemplo n.º 20

0

Exibir arquivo

Arquivo: Evaluator.py Projeto: webdevoir/16073301_KahSiongTan_AnsAppForms

class Evaluator():

    SCORE_ACCURACY='acc'
    SCORE_CONFUSIONMATRIX='confmatrix'
    SCORE_CLASSREPORT='class'
    SCORE_F1='f1'
    SCORE_PRECISION='precision'
    SCORE_RECALL='recall'
    SCORE_F1_PERCLASS='f1_perclass'
    SCORE_PRECISION_PERCLASS='precision_perclass'
    SCORE_RECALL_PERCLASS='recall_perclass'


    __logFile=''
    util=Utilities.Utility()

    def __init__(self, logFile=None, utilObj=None):
        if (utilObj != None):
            self.util = utilObj
        elif (logFile != None):
            self.util = Utilities.Utility()
            self.util.setupLogFileLoc(logFile)

    def generateGraphicalConfusionMatrix(self, array):
        import seaborn as sn
        import matplotlib.pyplot as plt
        df_cm = pd.DataFrame(array, index=[i for i in "ESPW"],
                             columns=[i for i in "ESPW"])
        plt.figure(figsize=(10, 7))
        confMatrix=sn.heatmap(df_cm, annot=True, cmap="YlGnBu"  )
        return plt
        pass

    def generateSummary(self, folderpath):
        from decimal import Decimal
        metricDict=defaultdict(list)
        for metricFile in sorted(glob.iglob(folderpath + '/*.metric')):
            tokens=metricFile.split('/')[-1].split('_')
            approach=tokens[0]
            vsm=tokens[1]
            metric=tokens[2].split('.')[0]
            value=str(round(Decimal(self.util.readFileContent(metricFile)),2))
            recordTuple=(approach,vsm,value)
            if(metric in metricDict):
                tupleList=metricDict[metric]
                tupleList.append(recordTuple)
                metricDict[metric]=tupleList
            else:
                tupleList=[recordTuple]
                metricDict[metric] = tupleList

        metricTuple=[]
        for key,value in  metricDict.items():

            metricResultsDF = pd.DataFrame(columns=(['VSM']))
            metricResultsDF = metricResultsDF.set_index("VSM")
            for tupleList in value:
                approach=tupleList[0]
                vsm = tupleList[1]
                metricValue = tupleList[2]
                metricResultsDF.set_value(vsm, approach, metricValue)
            print(metricResultsDF)
            metricResultsDF.to_csv(folderpath+'/'+key+'.summary',header=True,index_label='VSM',index=True,mode='w')




    def printSummary(self):
        pass

    def score(self,y=None, ypred=None,type=SCORE_ACCURACY, filename=None, **kwargs):
        results=None
        if(type==self.SCORE_ACCURACY):
            # print(y)
            # print(ypred)
            results=accuracy_score(y,ypred)
            self.util.saveStringToFile(results,filename=filename+'_ACC.metric')

        elif(type==self.SCORE_CONFUSIONMATRIX):
            print('y:',y)
            print('ypred:', ypred)
            results=confusion_matrix(y,ypred)
            print('results:',results)

            try:
                import seaborn as sn
                import matplotlib.pyplot as plt
                snMatrix=self.generateGraphicalConfusionMatrix(results)
                snMatrix.savefig(filename + '_CONF.png')
            except Exception as error:
                print(error)
                self.util.logError('Evaluator','Graphical version of confusion matrix cannot be generated with xScreen and tkinter support on python')
            results=np.array2string(results)
            self.util.saveStringToFile((results), filename=filename + '_CONF.CONFU')

        elif(type==self.SCORE_CLASSREPORT):
            results=classification_report(y,ypred)
            # self.util.saveStringToFile(results, filename=filename + '_CLASS.metric')
        elif(type==self.SCORE_F1_PERCLASS):
            results=f1_score(y,ypred, average=None)
            counter=0
            for result in results:
                self.util.saveStringToFile(results[counter], filename=filename + '_F1-'+str(counter)+'.metric')
                counter=counter+1
        elif(type==self.SCORE_PRECISION_PERCLASS):
            results=precision_score(y,ypred, average=None)
            counter=0
            for result in results:
                self.util.saveStringToFile(results[counter], filename=filename + '_PREC-'+str(counter)+'.metric')
                counter=counter+1

        elif(type==self.SCORE_RECALL_PERCLASS):
            results=recall_score(y,ypred,average=None)
            counter=0
            for result in results:
                self.util.saveStringToFile(results[counter], filename=filename + '_RECALL-'+str(counter)+'.metric')
                counter=counter+1

        elif(type==self.SCORE_F1):
            results=f1_score(y,ypred, average='macro')
            self.util.saveStringToFile(results, filename=filename + '_F1.metric')
        elif(type==self.SCORE_PRECISION):
            results=precision_score(y,ypred, average='macro')
            self.util.saveStringToFile(results, filename=filename + '_PREC.metric')
        elif(type==self.SCORE_RECALL):
            results=recall_score(y,ypred,average='macro')
            self.util.saveStringToFile(results, filename=filename + '_RECALL.metric')

        return  results


    def _getFirstString(self,myDelimitedStr=None, delimiter=','):
        """
        Return the first string from a set of delimited strings
        :param categoryStr:
        :param delimiter:
        :return:
        """
        try:
            results=myDelimitedStr.split(delimiter)[0]
        except Exception as error:
            results=''
        return (results)


    def evaluateHeuristic(self, resultsDatasetFilename=None, heuristicsFilename=None, appd2vEduFolder=None, appd2vSkillsFolder=None, appd2vPersonalDetailsFolder=None):
        """
         resultsDataset contains the categorised results from Test phase 2.
        This will have information on appid, clientid, cvd2vfilename,and its inferred categories (edu, skills, personaldetails) and corresponding scores.
        appd2vEduFolder,appd2vSkillsFolder and appd2vPersonalDetailsFolder will contain the appd2v files which are named app_xx_yyyy.d2v.
        For each row in resultsDataset,
        - pull the content of cvd2vfilename.
        - pull the content of appd2v[category]Folder/app_xx_yyyy.d2v.
        - Strip the stop words
        compare every word in both content,
        - if the number of words identical hit a certain threshold, then HIT.
        - else, MISS
        Add this HIT/MISS into the resultsDataset csv as a new column.
        Add identical words as a new column
        :param resultsDatasetFilename: Names of files seperated by ';'
        :param heuristicsFilename: Name of the file to save results in
        :param appd2vEduFolder: Folder that contain the appd2v files which are named app_xx_yyyy.d2v.
        :param appd2vSkillsFolder: Folder that contain the appd2v files which are named app_xx_yyyy.d2v.
        :param appd2vPersonalDetailsFolder: Folder that contain the appd2v files which are named app_xx_yyyy.d2v.
        :return:
        """
        filecounter=0
        resultsDatasetFilenames=resultsDatasetFilename.split(';')
        fullResults = pd.DataFrame(columns=(
            'appid', 'clientid', 'cvd2vfilename', 'categories', 'scores', 'heuristics', 'heuristics_reason','content'))

        hitCounter=0
        MissCounter = 0
        for resultsDatasetFilename in resultsDatasetFilenames:
            #To confirm if the filename contains headers.
            self.util.logDebug('Evaluator-evaluateFromDataset', 'Reading ' + resultsDatasetFilename)
            resultsDF=None
            resultsDF=pd.read_csv(resultsDatasetFilename, header=None)

            #Should load as 'appid', 'clientid', 'cvd2vfilename','content', 'categories', 'scores'
            self.util.logDebug('Evaluator-evaluateFromDataset', 'Processing...')

            counter=0
            errcounter=0

            for index, row in resultsDF.iterrows():
                # try:
                    clientid=row[1]
                    clientid=(str(clientid)).zfill(3)
                    appid=row[0]
                    cvd2vFullpath=row[2]
                    categories=row[4]
                    scores=row[5]
                    content=row[3]
                    #The heuristics will only take the category with highest score.
                    category=self._getFirstString(myDelimitedStr=categories, delimiter=':')
                    score=self._getFirstString(myDelimitedStr=scores, delimiter=':')

                    cvd2vContent = open(cvd2vFullpath, 'r').read()
                    cvd2vContentTokens=self.util.tokenize(cvd2vContent)   #This is to be used for comparison.
                    heuristics_reason=''
                    appd2vFilename=''
                    #Based on the matched category, pull the relevant app_xx_yyyy.d2v file.
                    if (category==self.util.LOOKUP_CAT_EDU):
                        appd2vFilename=appd2vEduFolder+'/'+'app_'+str(clientid)+'_'+str(appid)+'.'+ self.util.LOOKUP_EXT_APPD2V

                    elif(category==self.util.LOOKUP_CAT_SKILLS):
                        appd2vFilename = appd2vSkillsFolder + '/' + 'app_' + str(clientid) + '_' + str(
                            appid) + '.' + self.util.LOOKUP_EXT_APPD2V

                    elif (category == self.util.LOOKUP_CAT_PERSONALDETAILS):
                        appd2vFilename = appd2vPersonalDetailsFolder + '/' + 'app_' + str(clientid) + '_' + str(
                            appid) + '.' + self.util.LOOKUP_EXT_APPD2V

                    if (os.path.exists(appd2vFilename)==True):
                        appd2vContent = open(appd2vFilename, 'r').read()
                    else:
                        appd2vContent=''
                        heuristics_reason='FILE_NOT_FOUND_IN_CATEGORY: '+appd2vFilename
                    appd2vContentTokens = self.util.tokenize(appd2vContent)  # This is to be used for comparison.

                    identicals=set(appd2vContentTokens) & set(cvd2vContentTokens)
                    # if(len(identicals)>0):
                    #     heuristics_reason=self.util.tokensToStr(identicals)
                    heuristics='MISS'
                    if(category==self.util.LOOKUP_CAT_EDU and len(identicals)>=self.util.THRES_EDU):
                        heuristics='HIT'
                        heuristics_reason = self.util.tokensToStr(identicals)
                    elif(category==self.util.LOOKUP_CAT_SKILLS and len(identicals)>=self.util.THRES_SKILLS):
                        heuristics = 'HIT'
                        heuristics_reason = self.util.tokensToStr(identicals)
                    elif (category == self.util.LOOKUP_CAT_PERSONALDETAILS and len(identicals) >= self.util.THRES_PERSONALDETAILS):
                        ## For personal details, can be more restrictive by limiting to words not in English Dictionary
                        identicals=self.util.returnNonEnglishDictWords(identicals)
                        if(len(identicals)>self.util.THRES_PERSONALDETAILS):
                            heuristics = 'HIT'
                            heuristics_reason = self.util.tokensToStr(identicals)

                    # print(heuristics)
                    currentRow = pd.DataFrame(data={'appid': [appid], 'clientid': [clientid],'cvd2vfilename': [resultsDatasetFilename], 'categories': [categories],'scores': [scores], 'heuristics': [heuristics],'heuristics_reason': [heuristics_reason],'content': [cvd2vContent]})
                    fullResults=fullResults.append(currentRow)
                    counter = counter + 1
                    if counter%100 ==0:
                        self.util.logDebug('Evaluator-evaluateFromDataset',
                                             str(counter) + ' files completed with ' + str(errcounter) + ' errors.')
                        self.util.logDebug('Evaluator-evaluateFromDataset', 'Saving!')
                        fullResults.to_csv(heuristicsFilename.split('.')[0]+'_'+str((filecounter)).zfill(2)+'.csv', ',', mode='a', header=False, index=False,
                                           columns=['appid', 'clientid', 'cvd2vfilename', 'categories', 'scores', 'heuristics', 'heuristics_reason', 'content'])
                        fullResults = fullResults[0:0]
                # except Exception as error:
                #     errcounter=errcounter+1
                #     self.util.logDebug('Evaluator-evaluateFromDataset', 'Error encountered: ' + repr(error))
            self.util.logDebug('Evaluator-evaluateFromDataset', 'Final save!')
            fullResults.to_csv(heuristicsFilename.split('.')[0]+'_'+str((filecounter)).zfill(2)+'.csv', ',', mode='a', header=False, index=False,
                               columns=['appid', 'clientid', 'cvd2vfilename', 'categories', 'scores', 'heuristics',
                                        'heuristics_reason', 'content'])
            filecounter=filecounter+1

# # TestA phase 1xa
# # python3 Evaluator.py '/u01/bigdata/03a_01b_test/cvd2v/test2/heuEval.log' '/u01/bigdata/03a_01b_test/cvd2v/test2/TestA_32_userlabelled_results_1000.csv;/u01/bigdata/03a_01b_test/cvd2v/test2/TestA_32_userlabelled_results_2000.csv;/u01/bigdata/03a_01b_test/cvd2v/test2/TestA_32_userlabelled_results_2759.csv' '/u01/bigdata/03a_01b_test/cvd2v/test2/heuEval.csv' '/u01/bigdata/01b_d2v/032/edu/doc2vecEdu' '/u01/bigdata/01b_d2v/032/skills/doc2vecSkills' '/u01/bigdata/01b_d2v/032/personaldetails/doc2vecPersonalDetails'
# if __name__ == "__main__":
#     if(len(sys.argv)==7):
#         logFile = sys.argv[1]
#         resultsDatasetFilename = sys.argv[2]
#         heuristicsFilename=(sys.argv[3])
#         appd2vEduFolder=(sys.argv[4])
#         appd2vSkillsFolder=sys.argv[5]
#         appd2vPersonalDetailsFolder=sys.argv[6]
#         print('Logging to ', logFile)
#         print('resultsDatasetFilename',resultsDatasetFilename)
#         print('heuristicsFilename', heuristicsFilename)
#         print('appd2vEduFolder', appd2vEduFolder)
#         print('appd2vSkillsFolder', appd2vSkillsFolder)
#         print('appd2vPersonalDetailsFolder', appd2vPersonalDetailsFolder)
#
#         eval=Evaluator(logFile)
#         eval.evaluateHeuristic(resultsDatasetFilename=resultsDatasetFilename, heuristicsFilename=heuristicsFilename, appd2vEduFolder=appd2vEduFolder, appd2vSkillsFolder=appd2vSkillsFolder, appd2vPersonalDetailsFolder=appd2vPersonalDetailsFolder)
#     else:
#         print('Arguments incorrect')

# e=Evaluator()
# e.generateSummary('/u01/bigdata/02d_d2vModel1/features')

Exemplo n.º 21

0

Exibir arquivo

 def __init__(self,logfile=None):
     self._logfile=logfile
     self._util = Utilities.Utility()
     self._util.setupLogFileLoc(logfile)