def extractInfo(sent): degreeModel = [] degreeSent = JobSentence(sent.split()) labeler.labelSentence(degreeSent) # print degreeSent.printSentenct() # f.write( degreeSent.printSentenct().get_string() +"\n\n" ) labeledArray = degreeSent.getLabeledArray(labeler.ontoDict) # print degreeSent.printLabeledArray() matcher = matchSent(degree_patterns.degree_matchers, labeledArray) if matcher is not None: output = matcher.output() degreeModel.extend(output) matcher = matchSent(major_patterns.major_matchers, labeledArray) if matcher is not None: output = matcher.output() degreeModel.extend(output) matcher = matchSent(prefer_patterns.prefer_matchers, labeledArray) if matcher is not None: output = matcher.output() degreeModel.extend(output) return degreeSent, degreeModel
def processTitle(title): titleSent = JobSentence(title.lower().split()) labeler.labelSentence(titleSent) labeledArray = titleSent.getLabeledArray(labeler.ontoDict) # print titleSent.printLabeledArray() matcher = matchSent(matchers, labeledArray) return matcher
def getLabeledSentence(data_set_name, outfileName): labelGrammer = createDegreeGrammar() data = datautils.loadJson(data_set_name) pattern1 = [ "DE_LEVEL", StarRepetition([",", "DE_LEVEL"]), QuestionRepetition(["OR", "DE_LEVEL"]), "DEGREE" ] fst = TokenRegex(pattern1) matchSum = 0 f = open(outfileName, "w") for item in data: # print item words = item[2].split() degreeSent = JobSentence(words) labelGrammer.labelSentence(degreeSent) labeledArray = degreeSent.getLabeledArray() array = [x[0] for x in labeledArray] print item[0], ": ", array match = fst.match(array) print "match=", match if match: matchSum += 1 # printTrack(track) f.write(item[0] + "\n\n") table = degreeSent.printLabeledArray() # f.write( table.get_string() + "\n\n" ) print "match rate =", str(matchSum) + "/" + str( len(data)) + "=", matchSum / len(data)
def getLabeledSentence(data_set_name, outfileName): labelGrammer = createDegreeGrammar() data = datautils.loadJson(data_set_name) pattern1 = ["DE_LEVEL", StarRepetition([",","DE_LEVEL"]), QuestionRepetition(["OR","DE_LEVEL"]),"DEGREE" ] fst = TokenRegex(pattern1) matchSum = 0 f = open(outfileName, "w") for item in data: # print item words = item[2].split() degreeSent = JobSentence(words) labelGrammer.labelSentence(degreeSent) labeledArray = degreeSent.getLabeledArray() array = [x[0] for x in labeledArray ] print item[0], ": " ,array match = fst.match(array) print "match=", match if match : matchSum += 1 # printTrack(track) f.write ( item[0] + "\n\n") table = degreeSent.printLabeledArray() # f.write( table.get_string() + "\n\n" ) print "match rate =" , str(matchSum)+"/"+str(len(data)) + "=", matchSum/len(data)
def labelSent(labeler, matcher, sent): degreeSent = JobSentence(sent.split()) labeler.labelSentence(degreeSent) # print degreeSent.printSentenct() labeledArray = degreeSent.getLabeledArray(labeler.ontoDict) # print degreeSent.printLabeledArray() i = matcher.findMatching(labeledArray) return i, degreeSent
def labelSentByMatchers(matchers, sent): degreeSent = JobSentence(sent.split()) labeler.labelSentence(degreeSent) # print degreeSent.printSentenct() # f.write( degreeSent.printSentenct().get_string() +"\n\n" ) labeledArray = degreeSent.getLabeledArray(labeler.ontoDict) # print degreeSent.printLabeledArray() matcher = matchSent(matchers, labeledArray) return degreeSent, matcher
def labelSent(sent): tokens, posTags = tagSentence(sent) degreeSent = JobSentence(tokens, posTags) labeler.labelSentence(degreeSent) # print degreeSent.printSentenct() return degreeSent
def labelDegreeSet(data_set_name, outfileName): labelGrammer = createDegreeGrammar() data = datautils.loadJson(data_set_name) f = open(outfileName, "w") for item in data: # print item words = item[2].split() degreeSent = JobSentence(words) labelGrammer.labelSentence(degreeSent) print item[0] f.write ( item[0] + "\n\n") table = degreeSent.printSentenct() # print table.get_string() + "\n\n" f.write( table.get_string() + "\n\n" )
def labelDegreeSet(data_set_name, outfileName): labelGrammer = createDegreeGrammar() data = datautils.loadJson(data_set_name) f = open(outfileName, "w") for item in data: # print item words = item[2].split() degreeSent = JobSentence(words) labelGrammer.labelSentence(degreeSent) print item[0] f.write(item[0] + "\n\n") table = degreeSent.printSentenct() # print table.get_string() + "\n\n" f.write(table.get_string() + "\n\n")
def labelDegree(): sent01 = "bachelors degree" sent02 = "bachelors Degree preferred" sent03 = "Bachelors Degree or Equivalent" sent04 = "bachelors degree in Computer Science" sent05 = "bachelors degree in Computer Science or equivalent" sent06 = "B.S. degree in Computer Science required" sent07 = "Requires a Bachelors degree in Information Systems or related field" sent08 = "Bachelors degree in computer science or an equivalent combination of education and/or experience" sent09 = "bachelors degree in related field , OR four ( 4 ) years of experience in a directly related field" sent10 = "Bachelors or master degree in computer science" sent11 = "Bachelor , Master or Doctorate of Science degree from an accredited course of study , in engineering , computer science , mathematics , physics or chemistry" labelGrammer = createDegreeGrammar() # printLabelGrammar(labelGrammer) degreeSent = JobSentence(sent06.split()) labelGrammer.labelSentence(degreeSent) degreeSent.printSentenct()