Python JobDescParser примеры использования

Язык программирования: Python

Пространство имен/Пакет: jobdescparser

Класс/Тип: JobDescParser

Примеров на hotexamples.com: 9

Python JobDescParser - 9 примеров найдено. Это лучшие примеры Python кода для jobdescparser.JobDescParser, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

parseJobDesc(7)

Пример #1

Показать файл

Файл: loadsent1.py Проект: folagit/resumatcher

def getSentsByOntology():
     owlfile = "..\..\jobaly\ontology\web_dev.owl"
     ontology = OntologyLib(owlfile)
     terms = [ " "+ x.lower()+" " for x in ontology.getLabelList()]
     terms.extend([" "+x.lower()+" " for x in ontology.getAllClassNames()])
     
     srcBbClient = DbClient('localhost', 27017, "jobaly_daily_test")
     newCol = srcBbClient.getCollection("daily_job_webdev")     
     collection = newCol
     
     matchingSents = []
     for job in collection.find(): 
      #   print "\n\n\n======",job["_id"],"============================\n"
        jobDesc = JobDescParser.parseJobDesc(job)
        sents = jobDesc.listAllSentences() 
        jid = job["_id"]
        for sent in sents:
            c = 0
            sent = " "+sent.lower()+" "
            for term in terms:                
                if sent.find(term) != -1:
                   c+=1
                if c==3 : 
                    print sent.encode("GBK", "ignore")
                    matchingSents.append((jid, sent))
                    break
              
     sortedsents = sorted(matchingSents, key=lambda x:   len(x[1]) )
     dumpTwo(sortedsents, "term3" , ( lambda x: x[0] + ":" + x[1] ) )

Пример #2

Показать файл

Файл: termdistance.py Проект: folagit/resumatcher

def getDisMatrixFromColletion(): 
     srcBbClient = DbClient('localhost', 27017, "jobaly_daily_test")
     collection = srcBbClient.getCollection("daily_job_webdev")
     f = open('sents.txt','w')
      # python will convert \n to os.linesep
     
     docs = []
     for job in collection.find(): 
      #  print "\n\n\n======",job["_id"],"============================\n"
     #   f.write(job["summary"].encode("GBK", "ignore")+"\n")
        jobDesc = JobDescParser.parseJobDesc(job)
        
        sents = jobDesc.listAllSentences() 
        doc =[]
        for sent in sents:
           # print sent.encode("GBK", "ignore")
            f.write(sent.encode("GBK", "ignore")+"\n")
            tokens = [ token.lower() for token in word_tokenize(sent)]              
            for token in tokens:
                if token == 'c':
                #    print token
                    pass
            doc.extend(tokens)        
        docs.append(doc)
     f.close()
     terms=["javascript", "jquery", "html", "css", "java", "python", "ruby", "mysql", "jdbc" , "cpp"  ]
  #   terms=["javascript", "jquery", "html", "css", "java", "jsp", "python", "ruby", "ror"  ]
  

   # terms=["java","jdbc","spring","hibernate","mysql","oracle"]
     matrix = getDistanceMatrix(docs, terms)   
     printDisMatrix(terms, matrix)   
     matrix_dump = json.dumps(matrix)
     print matrix_dump

Пример #3

Показать файл

Файл: process_sentences.py Проект: folagit/resumatcher

def getAllSentsInColl(collection):
    allSents = []
    for job in collection.find(): 
        print "\n\n\n======",job["_id"],"============================\n"
        jobDesc = JobDescParser.parseJobDesc(job)
        sents = [ (  jobDesc._id, sent )  for sent in  jobDesc.listAllSentences() ]
        allSents.extend(sents)  
    return allSents

Пример #4

Показать файл

def preprocess( job ):
    jobDesc = JobDescParser.parseJobDesc(job)    
    sents = jobDesc.listAllSentences() 
    sents2 = []
    for line in sents:     
        sents2.append( processLine(line)  )
    
    return sents2

Пример #5

Показать файл

Файл: process_sentences.py Проект: folagit/resumatcher

def getAllSentsInColl(collection):
    allSents = []
    for job in collection.find():
        print "\n\n\n======", job["_id"], "============================\n"
        jobDesc = JobDescParser.parseJobDesc(job)
        sents = [(jobDesc._id, sent) for sent in jobDesc.listAllSentences()]
        allSents.extend(sents)
    return allSents

Пример #6

Показать файл

Файл: loadsent1.py Проект: folagit/resumatcher

def getSentenceByTerm(collection, term, outputPath):
    
     matchingSents = []
     for job in collection.find(): 
      #   print "\n\n\n======",job["_id"],"============================\n"
        jobDesc = JobDescParser.parseJobDesc(job)
        sents = jobDesc.listAllSentences() 
        jid = job["_id"]
        for sent in sents:
            tokens = [ token.lower() for token in word_tokenize(sent)]              
            if term in tokens : 
                matchingSents.append((jid, sent))
                print sent.encode("GBK", "ignore")
                
     sortedsents = sorted(matchingSents, key=lambda x:   len(x[1]) )
     dumpTwo(sortedsents, outputPath , ( lambda x: x[0] + ":" + x[1] ) )

Пример #7

Показать файл

Файл: loadsent1.py Проект: folagit/resumatcher

def getJavaScipt(): 
     srcBbClient = DbClient('localhost', 27017, "jobaly_daily_test")
     newCol = srcBbClient.getCollection("daily_job_webdev")
     
     collection = newCol
     term = "javascript"
     matchingSents = []
     for job in collection.find(): 
      #   print "\n\n\n======",job["_id"],"============================\n"
        jobDesc = JobDescParser.parseJobDesc(job)
        sents = jobDesc.listAllSentences() 
        jid = job["_id"]
        for sent in sents:
            tokens = [ token.lower() for token in word_tokenize(sent)]              
            if term in tokens : 
                matchingSents.append((jid, sent))
                print sent.encode("GBK", "ignore")
                
     sortedsents = sorted(matchingSents, key=lambda x:   len(x[1]) )
     dumpTwo(sortedsents, "..\skill\output\javascript" , ( lambda x: x[0] + ":" + x[1] ) )

Пример #8

Показать файл

Файл: pairdistance.py Проект: folagit/resumatcher

def createDocs():
     srcBbClient = DbClient('localhost', 27017, "jobaly_daily_test")
     collection = srcBbClient.getCollection("daily_job_webdev")
     maxnum =99999
     docs = []
     i= 0
     for job in collection.find(): 
        i+=1
        if i == maxnum: 
             break
      #   print "\n\n\n======",job["_id"],"============================\n"
        jobDesc = JobDescParser.parseJobDesc(job)
        sents = jobDesc.listAllSentences() 
       
        doc =[]
        for sent in sents:
            tokens = [ token.lower() for token in word_tokenize(sent)]              
            doc.extend(tokens)      
        docs.append(doc)   
    
     return docs

Пример #9

Показать файл

def getDisMatrixFromColletion():
    srcBbClient = DbClient('localhost', 27017, "jobaly_daily_test")
    collection = srcBbClient.getCollection("daily_job_webdev")
    f = open('sents.txt', 'w')
    # python will convert \n to os.linesep

    docs = []
    for job in collection.find():
        #  print "\n\n\n======",job["_id"],"============================\n"
        #   f.write(job["summary"].encode("GBK", "ignore")+"\n")
        jobDesc = JobDescParser.parseJobDesc(job)

        sents = jobDesc.listAllSentences()
        doc = []
        for sent in sents:
            # print sent.encode("GBK", "ignore")
            f.write(sent.encode("GBK", "ignore") + "\n")
            tokens = [token.lower() for token in word_tokenize(sent)]
            for token in tokens:
                if token == 'c':
                    #    print token
                    pass
            doc.extend(tokens)
        docs.append(doc)
    f.close()
    terms = [
        "javascript", "jquery", "html", "css", "java", "python", "ruby",
        "mysql", "jdbc", "cpp"
    ]
    #   terms=["javascript", "jquery", "html", "css", "java", "jsp", "python", "ruby", "ror"  ]

    # terms=["java","jdbc","spring","hibernate","mysql","oracle"]
    matrix = getDistanceMatrix(docs, terms)
    printDisMatrix(terms, matrix)
    matrix_dump = json.dumps(matrix)
    print matrix_dump