Exemplo n.º 1
0
 def get_document_type(self, dictname) :
     
     textfname = "../dictionary/text/" + dictname  + ".txt"
     law_document = LawDocument()
     law_document.analyze(filename=textfname)
     
     text = law_document.document_type;
     
     return text
Exemplo n.º 2
0
 def get_document(self, dictname) :
     
     textfname = "../dictionary/text/" + dictname  + ".txt"
     law_document = LawDocument()
     law_document.analyze(filename=textfname)
     
     text = "\n".join(law_document.document_title);
     
     return text
Exemplo n.º 3
0
 def get_document_chapiter(self, sims, dictname) :
     
     textfname = "../dictionary/text/" + dictname  + ".txt"
     law_document = LawDocument()
     law_document.analyze(filename=textfname)
     text = "";
     n_line = 1
     for sim in sims :
         doc_no, simil = sim[0], sim [1]
         if (simil > 0.4) :
             text +=  "******** " + str(n_line) + "  ********\n"
             text += law_document.get_document_chapiter(doc_no) + "\n"
             n_line += 1
             if n_line > 2:
                 break;
         else :
             break
     return text
Exemplo n.º 4
0
    def search_document(self, textpath, filename) :
        ld = LawDocument()
        ld.analyze(filename=filename)
        
        
        doc_tab = []
        names = os.listdir("../dictionary/dict")
        n_file = 1
        for filename in os.listdir("../dictionary/dict") :
            if filename.endswith(".dict") :
                dictname = filename.replace('.dict', '')
                total = 0.0
                sentences = []
                for sentence in ld.table_contents :
                    if len(sentence) > 1 :
                        
                        sims = self.text_search_lsi(textpath, sentence[1])
                        total += self.get_similarity_value(sims)
                
                doc_tab.append([dictname, total])
        doc_tab = sorted(doc_tab, key=lambda total: total[1], reverse=True)

        return self.get_document_type(doc_tab[0][0])