Пример #1
0
 def extractFromJob(self, node, fieldNode):
     for start, end in fieldNode.getExperienceIndex().items():
         for line in self.content[start : end]:
             if 'work experience' == line:#don't process work experience, only process experience
                 break
             wordsInList = word_tokenize(line)
             for word in wordsInList:
                 if word in self.getExperienceKeywordList():# to tackle solely requirements section as there can be a mixture of key requirements and skills section
                     self.extractedContent.add(line)
     for line in self.extractedContent:
         wordsInList = word_tokenize(line)
         try:
             annotatedList = [i.get('surfaceForm') for i in annotate(line)]
             yearIndex = None
             year = 'year'
             years = 'years'
             if years in wordsInList or year in wordsInList:
                 yearIndex = wordsInList.index(years) or wordsInList.index(year)
             for word in annotatedList:
                 if yearIndex is not None and word[0].isalpha():
                     experienceSubNode = ExperienceSubNode(word, " ".join(wordsInList[yearIndex-1 : yearIndex+1]))
                     node.addExperience(experienceSubNode)
                 else:
                     if word[0].isalpha():
                         experienceSubNode = ExperienceSubNode(word, None)
                         node.addExperience(experienceSubNode)
         except:
             pass
Пример #2
0
 def parse(self, node, fieldNode):
     if bool(fieldNode.getSkillsIndex()):
         for start, end in fieldNode.getSkillsIndex().items():
             for line in self.content[start : end]:
                 if line and 'page' not in line:
                     self.extractedContent.add(line)
         listString = "\n".join(line for line in self.extractedContent)
         self.extractedContent = [i.get('surfaceForm') for i in annotate(listString)]
         for line in self.extractedContent:
             if line not in self.getSkillKeywordList():
                 node.addSkill(line)
Пример #3
0
    def parse(self, node, fieldNode):
        if bool(fieldNode.getLanguageIndex()):
            for start, end in fieldNode.getLanguageIndex().items():
                for line in self.content[start : end]:
                    self.extractedContent.add(line)

            listString = "\n".join(line for line in self.extractedContent)
            if len(self.extractedContent) > 0:
                self.extractedContent = annotate(listString)
                for line in self.extractedContent:
                    if line.get('surfaceForm') not in self.getLanguageKeywordsList() and 'language' in line.get('types').lower():
                        node.addLanguage(line.get('surfaceForm'))
Пример #4
0
 def parse(self, node, fieldNode):
     if bool(fieldNode.getSkillsIndex()):
         for start, end in fieldNode.getSkillsIndex().items():
             for line in self.content[start:end]:
                 if line and 'page' not in line:
                     self.extractedContent.add(line)
         listString = "\n".join(line for line in self.extractedContent)
         self.extractedContent = [
             i.get('surfaceForm') for i in annotate(listString)
         ]
         for line in self.extractedContent:
             if line not in self.getSkillKeywordList():
                 node.addSkill(line)
Пример #5
0
    def parse(self, node, fieldNode):
        if bool(fieldNode.getLanguageIndex()):
            for start, end in fieldNode.getLanguageIndex().items():
                for line in self.content[start:end]:
                    self.extractedContent.add(line)

            listString = "\n".join(line for line in self.extractedContent)
            if len(self.extractedContent) > 0:
                self.extractedContent = annotate(listString)
                for line in self.extractedContent:
                    if line.get(
                            'surfaceForm') not in self.getLanguageKeywordsList(
                            ) and 'language' in line.get('types').lower():
                        node.addLanguage(line.get('surfaceForm'))
Пример #6
0
 def extractEducationFromJob(self, node, fieldNode):
     for start, end in fieldNode.getEducationIndex().items():
         for line in self.content[start : end]:
             wordsList = word_tokenize(line)
             if len(wordsList) > 0:
                 for word in wordsList:
                     if word in self.getEducationLevel():
                         self.extractedContent.add(line)
     listString = "\n".join(line for line in self.extractedContent)
     try:
         self.extractedContent = [i.get('surfaceForm') for i in annotate(listString)]
         for line in self.extractedContent:
             if line in self.getEducationLevel() and line not in self.getIgnoredKeywords():
                 educationType = line
             if line not in self.getEducationLevel() and line not in self.getIgnoredKeywords():
                 node.addEducation(educationType +" in "+ line, None)
     except:
         pass