Exemplo n.º 1
0
def simlarityTest():
    pkfile = open('rater.pkl', 'r')
    rater = pickle.load(pkfile)
    pkfile.close()

    essays = CLECReader.parseCLECFile2('clecst/ST3.txt')
    print len(essays)

    essayDict = {}

    for e in essays:
        if not essayDict.has_key(e.title):
            essayDict[e.title] = []
        essayDict[e.title].append(e)

    print essayDict.keys()

    for k, v in essayDict.items():
        print len(v), k

    passages = []
    count = 0
    for e in essayDict['Global Shortage of Fresh Water'][:120]:
        count += 1
        newpassage = EssayPassage()
        newpassage.passage = e.content
        newpassage.id = str(count)
        newpassage.score = e.score
        newpassage.processStatus = 0
        passages.append(newpassage)
        rater.rate(newpassage)

    for p in passages:
        print p.score, p.rateScore, p.lsaSimilarity, p.lsaSimilarityAll

    print "OK"
Exemplo n.º 2
0
def simlarityTest():
    pkfile = open('rater.pkl', 'r')
    rater = pickle.load(pkfile)
    pkfile.close()
    
    essays = CLECReader.parseCLECFile2('clecst/ST3.txt')
    print len(essays)
    
    essayDict = {}
    
    for e in essays:
        if not essayDict.has_key(e.title):
            essayDict[e.title] = []
        essayDict[e.title].append(e)
        
    print essayDict.keys()
    
    for k, v in essayDict.items():
        print len(v), k
        
    passages = []
    count = 0 
    for e in essayDict['Global Shortage of Fresh Water'][:120]:  
        count += 1     
        newpassage = EssayPassage()
        newpassage.passage = e.content
        newpassage.id = str(count) 
        newpassage.score = e.score
        newpassage.processStatus = 0
        passages.append(newpassage)
        rater.rate(newpassage)

    for p in passages:
        print p.score, p.rateScore, p.lsaSimilarity, p.lsaSimilarityAll
    
    print "OK"
Exemplo n.º 3
0
def do_task(task):
    newpassage = EssayPassage()
    newpassage.passage = task['input']['content']
    newpassage.orderId = task['id']
    newpassage.score = 0
    newpassage.processStatus = 0
    try:
        essayprepare.processPassage(newpassage, fn_prepare_progress)
        fe = extractor.FeatherExtractor()
        lf = fe.extractLangFeather(newpassage)
        newpassage.lf = lf
        cf = fe.extractContentFeather(newpassage)
        newpassage.cf = cf
        sf = fe.extractStructureFeather(newpassage) 
        newpassage.sf = sf
        newpassage.score = rater.rate_by_params(newpassage)[0]
    except:
        task['progress'] = -2
        task['status'] = 'TUTERR'
        task['output'] = ""
        task['simple_output'] = ""
        task['detail_output'] = ""
        commit_task(task)
        return

    # 生成最终结果
    output = {}
    passage = {}
    passage['score'] = newpassage.score
    passage['token_count'] = lf.tokenCount
    passage['word_count'] = lf.wordCount
    passage['word_type_count'] = lf.wordTypeCount
    passage['word_lemma_count'] = lf.wordLemmaCount
    passage['word_stem_count'] = lf.wordStemCount
    passage['average_word_length'] = lf.wordLengthAverage
    passage['average_sentence_length'] = lf.sentenceLengthAverage
    passage['overly_use_word_count'] = lf.overlyUseWordCount
    passage['paragraph_count'] = len(newpassage.paragraphs)
    passage['sentence_count'] = newpassage.sentenceCount
    passage['sentences'] = []
    for para in newpassage.paragraphs:
        for sent in para.sentences:
            sentence = {}
            sentence['no'] = sent.sentenceNo
            sentence['para_no'] = para.paragraphNo
            sentence['original'] = sent.sentence
            sentence['score'] = 0
            spell_errors = []
            fs = []
            for token in sent.tokens:
                if token.isSpellError:
                    fs.append('<ESP>' + token.token + '</ESP>')
                    spell_error = {}
                    spell_error['token'] = token.token
                    spell_error['lemma'] = token.lemma
                    spell_error['suggest'] = token.candidates
                    spell_error['start_at'] = token.startAt
                    spell_error['end_at'] = token.endAt
                    spell_errors.append(spell_error)
                else:
                    fs.append(token.token)
            sentence['spell_errors'] = spell_errors
            sentence['marked'] = ' '.join(fs)
            sentence['lt_result'] = sent.ltCheckResults   
            sentence['lg_result'] = sent.lgCheckResults
            sentence['links'] = []
            passage['sentences'].append(sentence)
           
    output['passage'] = passage
    task['progress'] = 100
    task['status'] = 'DONE'
    task['output'] = json.dumps(output)
    task['simple_output'] = json.dumps(output)    
    task['detail_output'] = json.dumps(generate_detail_output(newpassage))   
        
    commit_task(task)
Exemplo n.º 4
0
 passage = None
 while True:
     request = socket.recv()
     print request
     try:
         rs = json.loads(request)
     except:
         socket.send("")
         continue
     if rs['ACTION'] == 'SUBMIT':
         orderId += 1
         newpassage = EssayPassage()
         newpassage.passage = rs['text']
         newpassage.orderId = orderId
         newpassage.score = 0
         newpassage.processStatus = 0
         waitingPassages.append(newpassage)
         if ((not passage) or passage.rated) and len(waitingPassages) > 0:
             passage = waitingPassages.pop(0)
             donePassages[passage.orderId] = passage
             rthread = RatePassageThread(rater, passage)
             rthread.start()
         reply = json.dumps({'orderId':orderId, 'progress':0, 'rated':0})
         socket.send_unicode(reply)
     elif rs['ACTION'] == 'QUERY':
         oId = int(rs['orderId'])
         if not oId in donePassages:
             reply = json.dumps({'orderId':oId, 'progress':0, 'rated':0})
             socket.send_unicode(reply)
         else:
             if passage.rated:
Exemplo n.º 5
0
def do_task(task):
    newpassage = EssayPassage()
    newpassage.passage = task['input']['content']
    newpassage.orderId = task['id']
    newpassage.score = 0
    newpassage.processStatus = 0
    try:
        essayprepare.processPassage(newpassage, fn_prepare_progress)
        fe = extractor.FeatherExtractor()
        lf = fe.extractLangFeather(newpassage)
        newpassage.lf = lf
        cf = fe.extractContentFeather(newpassage)
        newpassage.cf = cf
        sf = fe.extractStructureFeather(newpassage)
        newpassage.sf = sf
        newpassage.score = rater.rate_by_params(newpassage)[0]
    except:
        task['progress'] = -2
        task['status'] = 'TUTERR'
        task['output'] = ""
        task['simple_output'] = ""
        task['detail_output'] = ""
        commit_task(task)
        return

    # 生成最终结果
    output = {}
    passage = {}
    passage['score'] = newpassage.score
    passage['token_count'] = lf.tokenCount
    passage['word_count'] = lf.wordCount
    passage['word_type_count'] = lf.wordTypeCount
    passage['word_lemma_count'] = lf.wordLemmaCount
    passage['word_stem_count'] = lf.wordStemCount
    passage['average_word_length'] = lf.wordLengthAverage
    passage['average_sentence_length'] = lf.sentenceLengthAverage
    passage['overly_use_word_count'] = lf.overlyUseWordCount
    passage['paragraph_count'] = len(newpassage.paragraphs)
    passage['sentence_count'] = newpassage.sentenceCount
    passage['sentences'] = []
    for para in newpassage.paragraphs:
        for sent in para.sentences:
            sentence = {}
            sentence['no'] = sent.sentenceNo
            sentence['para_no'] = para.paragraphNo
            sentence['original'] = sent.sentence
            sentence['score'] = 0
            spell_errors = []
            fs = []
            for token in sent.tokens:
                if token.isSpellError:
                    fs.append('<ESP>' + token.token + '</ESP>')
                    spell_error = {}
                    spell_error['token'] = token.token
                    spell_error['lemma'] = token.lemma
                    spell_error['suggest'] = token.candidates
                    spell_error['start_at'] = token.startAt
                    spell_error['end_at'] = token.endAt
                    spell_errors.append(spell_error)
                else:
                    fs.append(token.token)
            sentence['spell_errors'] = spell_errors
            sentence['marked'] = ' '.join(fs)
            sentence['lt_result'] = sent.ltCheckResults
            sentence['lg_result'] = sent.lgCheckResults
            sentence['links'] = []
            passage['sentences'].append(sentence)

    output['passage'] = passage
    task['progress'] = 100
    task['status'] = 'DONE'
    task['output'] = json.dumps(output)
    task['simple_output'] = json.dumps(output)
    task['detail_output'] = json.dumps(generate_detail_output(newpassage))

    commit_task(task)