-
Notifications
You must be signed in to change notification settings - Fork 0
/
SkillsParser.py
36 lines (33 loc) · 1.48 KB
/
SkillsParser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
__author__ = 'Owner'
from nltk import word_tokenize
from IParser import IParser
from InformationNode import InformationNode
from ResumeNode import ResumeNode
from JobDescNode import JobDescNode
from FieldsIndexNode import FieldsIndexNode
import Rake
from DBpediaSpotlight import annotate
class SkillsParser(IParser):
def __init__(self, input):
self.content = InformationNode.convertStringIntoList(input)
self.extractedContent = set()
def parse(self, node, fieldNode):
if bool(fieldNode.getSkillsIndex()):
for start, end in fieldNode.getSkillsIndex().items():
for line in self.content[start : end]:
if line and 'page' not in line:
self.extractedContent.add(line)
listString = "\n".join(line for line in self.extractedContent)
self.extractedContent = [i.get('surfaceForm') for i in annotate(listString)]
for line in self.extractedContent:
if line not in self.getSkillKeywordList():
node.addSkill(line)
# for line in self.extractedContent:
# try:
# result = annotate(line)
# for word in result:
# node.addSkill(word)
# except:
# print('Error in annotating {}'.format(line))
def getSkillKeywordList(self):
return ['skill', 'skills', 'expertise', 'proficiency', 'technical', 'qualification', 'qualifications', 'responsibilities']