def read_file(url, doc_ID, index):
     doc = Doc()
     doc.doc_ID = doc_ID
     file = open(url, 'r')
     content = list()
     page = BeautifulSoup(file.content, 'xml')
     content.append(page.find_all('p')[0].get_text())
     content.append(page.find_all('meta', name= 'keywords')[0].get_text())
     content.append(page.find_all('title')[0].get_text())
     tokenizer(content, doc, index)
Exemplo n.º 2
0
    def open(self):
        filename = QFileDialog.getOpenFileName(self, 'Open File', '.')[0]
        self.notename = "notes-"+filename.split("/")[-1].split(".")[0]+".txt"
        self.uploadButton.hide()
        print(filename)
        self.doc = Doc(filename)

        if not os.path.exists(self.notename):
            open(self.notename, "w").close()

        self.construct()
Exemplo n.º 3
0
 def buscar(self,dni_,opc):
     #metodo para seleccionar
     id=dni_ #cargamos el dni_ en una variable propia
     #Recorremos los campos que pertenezcan a la fila con la id correspondiente
     self.cursor.execute("select * from abonados where id='" + str(id) + "'")
     #recogemos todos los campos en la varible resultado
     resultado = self.cursor.fetchone()
     if resultado == None:
         print("DNI no encontrado")
         menu.principal()
         dnierror.adv()
     else:#dependiendo del estadp de busqueda llama a una funcion u otra
         if opc==1:
             buscar.busqueda(resultado[0],resultado[1],resultado[2],resultado[3],resultado[4], resultado[5])
         if opc==3:
             Doc.tool(resultado[0],resultado[1],resultado[2],resultado[3],resultado[4], resultado[5])
         else:
             borrar.eliminar(resultado[0],resultado[1],resultado[2])
Exemplo n.º 4
0
    def Parse(self):
        assert self.canParse, "FileScanner.py: Could not open %s." % (
            self.fileName)

        doc = Doc()
        doc.category = self.categoryName
        for line in self.GetComments():

            if self.StrAtIndex(line, ' *', 0):
                line = line.replace(' * ', "")
                line = line.replace(' *', "")

                if '@' in line:

                    # Split the line into words, then get the name of the category.
                    words = line.split(' ')
                    for word in words:
                        if '@' in word:
                            doc.category = word.split('@')[-1].capitalize()
                            break

                # Join sentences together to make one long string.
                doc.description += line

                # If a sentence doesn't have a space at the end,
                # add it to preserve grammar.
                if len(line) > 0:
                    if line[-1] != ' ':
                        doc.description += ' '

            elif '/*' not in line and '*/' not in line:
                print(
                    'FileScanner.py: Warning: Line %s of %s is missing a \' *\'; Line will be skipped.'
                    % (self.currentLine + 1, self.docFileName))

            if '*/' in line:
                # Check that the comment was right before a function or variable declaration or definition.
                doc.declaration = self.GetNextLine().strip()
                if ';' in doc.declaration or '{' in doc.declaration:
                    doc.declaration = doc.declaration.replace(";", "").replace(
                        "{", "")
                    self.docs.append(doc)
                else:
                    print(
                        'FileScanner.py: Warning: Line %s of %s was skipped due to missing a declaration or definition.'
                        % (self.currentLine + 1, self.docFileName))

                doc = Doc()
                doc.category = self.categoryName
Exemplo n.º 5
0
class TestDoc(unittest.TestCase):
    def setUp(self):
        self.doc = Doc(0)

    def test_parse_sentences(self):
        self.doc.body = ''
        self.assertEquals(self.doc.parse_sentences(), [])
        self.doc.body = 'I love you'
        self.assertTrue(len(self.doc.parse_sentences()), 1)

    def test_parse_words(self):
        self.doc.body = ''
        self.assertEquals(self.doc.parse_words(), [])
        self.doc.body = 'I love you'
        self.assertEquals(self.doc.parse_words(), ['i', 'love', 'you'])

    @unittest.skip('test')
    def test_text_file(self):
        with open('sampleText.txt', 'r') as inputfile:
            self.doc.body = inputfile.read()
Exemplo n.º 6
0
    def update(self, method):
        '''This method appends and prepares the file doc entered along with entered attributes
           and then writes in the information into a text box at the bottom. '''
        if method == "add":
            file = d.Document(self.entered_file)
            file.author = self.author
            file.genre = self.genre
            file.year = self.year
            self.fileL.append(file)
            self.text.insert(INSERT,' '*10 + str(file.fileName) + ' '*5 + str(file.genre) + ' '*5 + str(file.author) + ' '*5 + str(file.year) + '\n')
 # Makes all the entries blank                                                
        self.fileEntry.delete(0,END)
        self.AuthorEnt.delete(0,END)
        self.YearEnt.delete(0,END)
        self.GenreEnt.delete(0,END)
Exemplo n.º 7
0
    def processCollection(self):
        """
        Process documents collection of the system.
        """
        # List of Document objects held by the system. 
        docs = []
        # Get list of document name.
        self.docNames = self.getDocList()
        self.docNum = len(self.docNames)
        if DEBUG:
            print 'document list:'
            print self.docNames

        return Doc.processDocs(self.docNames,
                               self.docInfo['location'],
                               self.docInfo['srcType'])
Exemplo n.º 8
0
 def predict(self):
     '''calls all the methods for each button that corresponds to a prediction method'''
     file = d.Document(self.entered_file)
     file.genre = self.genre
     file.year = self.year
     if self.statMethod == '1':
         data = [[None, f.assignGenre(file)]]
         pred = self.skGenre.eval(data)
     elif self.statMethod == '2':
         data = [[None, f.assignYear(file)]]
         pred = self.skYear.eval(data)
     elif self.statMethod == '3':
         data = [[None, f.assignGenre(file)]]
         pred = self.idGenre.evaluate(data)[0]
     elif self.statMethod == '4':
         data = [[None, f.assignYear(file)]]
         pred = self.idYear.evaluate(data)[0]
     elif self.statMethod == '5':
         data = f.predData(file, self.skTop.labels)
         pred = self.skTop.eval(data)
     elif self.statMethod == '6':
         data = f.predData(file, self.skBottom.labels)
         pred = self.skBottom.eval(data)
     elif self.statMethod == '7':
         data = f.predData(file, self.idTop.columns)
         pred = self.idTop.evaluate(data)[0]
     elif self.statMethod == '8':
         data = f.predData(file, self.idBottom.columns)
         pred = self.idBottom.evaluate(data)[0]
     elif self.statMethod == '9':
         newfile = self.applyPredFilt(file)
         data = f.predPCA(newfile, self.pca.labels)
         ind = self.pca.eval(data)
         pred = [self.fileL[ind].author]
     self.prediction = pred[0]
     print('The predicted author is: ' + str(self.prediction))
     file.author = pred[0]
     self.fileL.append(file)
     self.text.insert(
         INSERT, ' ' * 10 + str(file.fileName) + ' ' * 5 + str(file.genre) +
         ' ' * 5 + str(file.author) + ' ' * 5 + str(file.year) + '\n')
     self.predictfileEntry.delete(0, END)
     self.predictyearEntry.delete(0, END)
     self.predictgenreEntry.delete(0, END)
Exemplo n.º 9
0
    def update(self, method):
        if method == 'reset':
            self.file = ''
            num = []
            self.file_label_text.set(self.file)
            self.entry.delete(0, END)
            return
        try:
            doc = d.Document(self.entered_file)
            doc.generateWhole()
            words = []

            for sent in doc.getSentences():
                if not sent.string[-1].isalpha():
                    s = sent.string[:-1]
                else:
                    s = sent.string
                w = [x.lower() for x in s.split()]
                words += w
            stats = b.BasicStats()
            stats.dic = b.BasicStats.createFreqMap(words)
            top = stats.topN(10)

            num = []
            for key in top:
                num.append(top[key])
            num.sort(reverse=True)

            if method == "bar":
                plot = Plotter(num)
                plot.barGraph()
                self.file = self.entered_file
            elif method == "scatter":
                plot = Plotter(num)
                plot.scatterPlot()
                self.file = self.entered_file
        except ds.DocumentStreamError as E:
            print(E.data)

        self.file_label_text.set(self.file)
        self.entry.delete(0, END)
Exemplo n.º 10
0
class QDataViewer(QWidget):
    def __init__(self):
        QWidget.__init__(self)
        # Layout Init.
        self.language = 'ud'
        if len(sys.argv)>1:
            self.language = sys.argv[1]
        self.setGeometry(650, 300, 600, 600)
        self.setWindowTitle('Data Viewer')
        self.uploadButton = QPushButton('Load Conll File', self)
        self.sentence_id = 0
        self.column_number = 10
        self.columns = ["ID", "FORM", "LEMMA", "UPOS", "XPOS", "FEATS", "HEAD", "DEPREL", "DEPS", "MISC"]
        self.current_dict = {}
        self.load_finished = True
        self.first_time = True
        self.session_start = True
        self.map_col = {0:"ID", 1:"FORM", 2:"LEMMA", 3:"UPOS", 4:"XPOS", 5:"FEATS", 6:"HEAD", 7:"DEPREL", 8:"DEPS", 9:"MISC", 10:"Abbr", 11:"Animacy", 12:"Aspect", 13:"Case",
                        14:"Clusivity", 15:"Definite", 16:"Degree", 17:"Echo", 18:"Evident", 19:"Foreign", 20:"Gender", 21:"Mood", 22:"NounClass", 23:"Number", 24:"Number[psor]",
                        25:"NumType", 26:"Person", 27:"Person[psor]", 28:"Polarity", 29:"Polite", 30:"Poss", 31:"PronType", 32:"Reflex", 33:"Register", 34:"Tense", 35:"VerbForm",
                        36:"Voice"}

        self.doc = None
        
        self.vBoxLayout = QVBoxLayout()
        self.vBoxLayout.addWidget(self.uploadButton)
        self.setLayout(self.vBoxLayout)
        
        # Signal Init.
        self.connect(self.uploadButton, QtCore.SIGNAL('clicked()'), self.open)

    def open(self):
        filename = QFileDialog.getOpenFileName(self, 'Open File', '.')[0]
        self.notename = "notes-"+filename.split("/")[-1].split(".")[0]+".txt"
        self.uploadButton.hide()
        print(filename)
        self.doc = Doc(filename)

        if not os.path.exists(self.notename):
            open(self.notename, "w").close()

        self.construct()

    def writeNotes(self):
        if self.qTextEdit2.toPlainText() != "Write your note here...":
            if self.qTextEdit2.toPlainText() == "":
                if str(self.sentence_id) in self.noteDictionary:
                    del self.noteDictionary[str(self.sentence_id)]
            else:
                self.noteDictionary[str(self.sentence_id)] = self.qTextEdit2.toPlainText().rstrip().replace("\r\n", " ").replace("\n", " ").replace("\r", " ")
            noteTxt = open(self.notename, "w")
            for noteKey in sorted(self.noteDictionary.keys()):
                noteTxt.write(noteKey+" --- "+self.noteDictionary[noteKey]+"\n")
            noteTxt.close()

    def go_prev(self):
        self.first_time = True
        self.writeNotes()

        if self.sentence_id>0:
            self.sentence_id-=1   
        self.update_table()
        self.session_start = True
        self.update_html()
        self.check_errors()

        self.qTextEdit.setText(str(self.sentence_id))
        self.first_time = False
    
    def go_next(self):
        self.first_time = True
        self.writeNotes()

        if self.sentence_id<len(self.doc.sentences)-1:
            self.sentence_id+=1
        self.update_table()
        self.session_start = True
        self.update_html()
        self.check_errors()

        self.qTextEdit.setText(str(self.sentence_id))
        self.first_time = False
    
    def go(self):
        self.doc.write()
        self.first_time = True
        self.writeNotes()

        try:
            self.sentence_id = int(self.qTextEdit.toPlainText())
            self.update_table()
            self.session_start = True
            self.update_html()
            self.check_errors()
        except Exception as e:
            print(e)
        
        self.qTextEdit.setText(str(self.sentence_id))
        self.first_time = False


    def reset(self):
        if not self.first_time:
            self.first_time = True
            self.sentence = copy.deepcopy(self.sentence_backup)
            self.doc.sentences[self.sentence_id] = copy.deepcopy(self.sentence_backup)
            self.session_start = True
            self.doc.write()
            self.update_table()
            self.update_html()
            self.check_errors()

            self.first_time = False


    def construct(self):
        self.hBoxLayout = QHBoxLayout()
        self.prevButton = QPushButton("Prev", self)
        self.prevButton.setShortcut("Alt+O")

        self.resetButton = QPushButton("Reset", self)
        self.resetButton.setShortcut("Alt+R")

        self.qTextEditAddRow = QTextEdit()
        self.qTextEditAddRow.setFixedHeight(20)
        self.qTextEditAddRow.setFixedWidth(60)

        self.qTextEditDeleteRow = QTextEdit()
        self.qTextEditDeleteRow.setFixedHeight(20)
        self.qTextEditDeleteRow.setFixedWidth(60)

        self.qTextEdit = QTextEdit()
        self.qTextEdit.setFixedHeight(20)
        self.qTextEdit.setFixedWidth(60)

        self.qTextEdit2 = QTextEdit()
        self.qTextEdit2.setFixedHeight(20)
        self.qTextEdit2.setFixedWidth(500)

        self.shortcutText=QShortcut(QtGui.QKeySequence("Alt+M"), self)
        self.shortcutText.activated.connect(self.qTextEdit2.setFocus)

        self.goButton = QPushButton("Go", self)
        self.goButton.setShortcut("Alt+G")
        self.nextButton = QPushButton("Next", self)
        self.nextButton.setShortcut("Alt+P")
        self.addRowButton = QPushButton("Add Row", self)
        self.deleteRowButton = QPushButton("Delete Row", self)
        self.hBoxLayout.addWidget(self.prevButton)
        self.hBoxLayout.addStretch()
        self.hBoxLayout.addWidget(self.resetButton)
        self.hBoxLayout.addStretch()
        self.hBoxLayout.addWidget(self.qTextEditAddRow)
        self.hBoxLayout.addWidget(self.addRowButton)
        self.hBoxLayout.addStretch()
        self.hBoxLayout.addWidget(self.qTextEditDeleteRow)
        self.hBoxLayout.addWidget(self.deleteRowButton)
        self.hBoxLayout.addStretch()
        self.hBoxLayout.addWidget(self.qTextEdit)
        self.hBoxLayout.addWidget(self.goButton)

        self.hBoxLayout.addStretch()
        self.hBoxLayout.addWidget(self.qTextEdit2)

        self.hBoxLayout.addStretch()
        self.hBoxLayout.addWidget(self.nextButton)
        self.vBoxLayout.addLayout(self.hBoxLayout)

        self.chBoxLayout = QHBoxLayout()
        self.chBoxLayout.addStretch()
        cb_ids = ["ID", "FORM", "LEMMA", "UPOS", "XPOS", "FEATS", "HEAD", "DEPREL", "DEPS", "MISC"]
        cb_ids2 = ["Abbr", "Animacy", "Aspect", "Case", "Clusivity", "Definite", "Degree", "Echo", "Evident", "Foreign", "Gender", "Mood", "NounClass", "Number"]
        cb_ids3 = ["Number[psor]", "NumType", "Person", "Person[psor]", "Polarity", "Polite", "Poss", "PronType", "Reflex", "Register", "Tense", "VerbForm", "Voice"]
        for cb_id in cb_ids:
            cb = QCheckBox(cb_id)
            cb.setChecked(True)
            cb.stateChanged.connect(self.cb_change)
            self.chBoxLayout.addWidget(cb)
        
        self.chBoxLayout.addStretch()
        self.vBoxLayout.addLayout(self.chBoxLayout)

        self.chBoxLayout_2 = QHBoxLayout()
        self.chBoxLayout_2.addStretch()
        for cb_id in cb_ids2:
            cb = QCheckBox(cb_id)
            cb.setChecked(False)
            cb.stateChanged.connect(self.cb_change)
            self.chBoxLayout_2.addWidget(cb)

        self.chBoxLayout_2.addStretch()
        self.vBoxLayout.addLayout(self.chBoxLayout_2)

        self.chBoxLayout_3 = QHBoxLayout()
        self.chBoxLayout_3.addStretch()
        for cb_id in cb_ids3:
            cb = QCheckBox(cb_id)
            cb.setChecked(False)
            cb.stateChanged.connect(self.cb_change)
            self.chBoxLayout_3.addWidget(cb)

        self.chBoxLayout_3.addStretch()
        self.vBoxLayout.addLayout(self.chBoxLayout_3)

        self.qTextEdit.setText(str(self.sentence_id))

        self.noteDictionary = {}
        noteFile = open(self.notename, "r")
        for note in noteFile:
            noteSplitted = note.split(" --- ")
            noteID = noteSplitted[0]
            noteContent = noteSplitted[1].rstrip()
            self.noteDictionary[noteID] = noteContent
        noteFile.close()

        self.connect(self.prevButton, QtCore.SIGNAL('clicked()'), self.go_prev)
        self.connect(self.resetButton, QtCore.SIGNAL('clicked()'), self.reset)
        self.connect(self.goButton, QtCore.SIGNAL('clicked()'), self.go)
        self.connect(self.nextButton, QtCore.SIGNAL('clicked()'), self.go_next)
        self.connect(self.addRowButton, QtCore.SIGNAL('clicked()'), self.add_row)
        self.connect(self.deleteRowButton, QtCore.SIGNAL('clicked()'), self.delete_row)

        # create table here
        self.tableWidget = QTableWidget(self)

        self.tableWidget.itemChanged.connect(self.handle_change)

        self.connect(self.tableWidget.verticalHeader(), QtCore.SIGNAL("sectionClicked(int)"), self.agg)

        self.qTextEditError = QTextEdit()
        self.qTextEditError.setReadOnly(True)

        self.splitter = QSplitter(Qt.Vertical)
        self.splitter.addWidget(self.tableWidget)
        self.splitter.addWidget(self.qTextEditError)
        self.vBoxLayout.addWidget(self.splitter)

        self.webView = QWebEngineView()

        self.update_table()
        self.update_html()
        self.check_errors()

        self.splitter2 = QSplitter(Qt.Vertical)
        self.splitter2.addWidget(self.splitter)
        self.splitter2.addWidget(self.webView)
        self.vBoxLayout.addWidget(self.splitter2)

        self.webView.loadFinished.connect(self.finito)

        self.first_time = False
    
    def finito(self):
        self.load_finished = True

    def add_row(self):

        if "-" not in self.qTextEditAddRow.toPlainText():

            word_id = int(self.qTextEditAddRow.toPlainText())
            possible_move = True
            new_sentence_words = []

            for word in self.sentence.words:
                if word.unitword:
                    x1 = int(word.id.split("-")[0])
                    x2 = int(word.id.split("-")[1])
                    if word_id == x1 or word_id == x2:
                        possible_move = False

            if possible_move:
                for word in self.sentence.words:
                    new_word = copy.deepcopy(word)

                    if new_word.head != "_" and int(new_word.head) >= word_id:
                        new_word.head = str(int(new_word.head) + 1)

                    if new_word.unitword:
                        new_word_id = int(new_word.id.split("-")[0])
                    else:
                        new_word_id = int(new_word.id)

                    if new_word_id < word_id:
                        new_sentence_words.append(new_word)
                    elif new_word_id == word_id:
                        if new_word.unitword:
                            x1 = int(new_word.id.split("-")[0])
                            x2 = int(new_word.id.split("-")[1])
                            w = Word("\t".join(
                                [str(x1), new_word.form, "_", "_", "_", "_", new_word.head, "_", "_", "_"]), self.sentence.sent_address)
                            new_word.id = str(x1 + 1) + "-" + str(x2 + 1)
                        else:
                            w = Word("\t".join(
                                [new_word.id, new_word.form, "_", "_", "_", "_", new_word.head, "_", "_", "_"]), self.sentence.sent_address)
                            new_word.id = str(int(new_word.id) + 1)
                        new_sentence_words.append(w)
                        new_sentence_words.append(new_word)
                    elif new_word_id > word_id:
                        if new_word.unitword:
                            x1 = int(new_word.id.split("-")[0])
                            x2 = int(new_word.id.split("-")[1])
                            new_word.id = str(x1 + 1) + "-" + str(x2 + 1)
                        else:
                            new_word.id = str(int(new_word.id) + 1)
                        new_sentence_words.append(new_word)

                self.sentence.words = copy.deepcopy(new_sentence_words)
                self.first_time = True
                self.update_table()
                self.update_html()
                self.first_time = False

    def delete_row(self):

        if "-" not in self.qTextEditDeleteRow.toPlainText():

            word_id = int(self.qTextEditDeleteRow.toPlainText())
            possible_move = True
            new_sentence_words = []

            for word in self.sentence.words:
                if word.unitword:
                    x1 = int(word.id.split("-")[0])
                    x2 = int(word.id.split("-")[1])
                    if word_id == x1 or word_id == x2:
                        possible_move = False
                if not word.head == "_":
                    if int(word.head) == word_id:
                        possible_move = False

            if possible_move:
                for word in self.sentence.words:
                    new_word = copy.deepcopy(word)

                    if new_word.head != "_" and int(new_word.head) >= word_id:
                        new_word.head = str(int(new_word.head) - 1)

                    if new_word.unitword:
                        new_word_id = int(new_word.id.split("-")[0])
                    else:
                        new_word_id = int(new_word.id)

                    if new_word_id < word_id:
                        new_sentence_words.append(new_word)
                    elif new_word_id > word_id:
                        if new_word.unitword:
                            x1 = int(new_word.id.split("-")[0])
                            x2 = int(new_word.id.split("-")[1])
                            new_word.id = str(x1 - 1) + "-" + str(x2 - 1)
                        else:
                            new_word.id = str(int(new_word.id) - 1)
                        new_sentence_words.append(new_word)

                self.sentence.words = copy.deepcopy(new_sentence_words)
                self.first_time = True
                self.update_table()
                self.update_html()
                self.first_time = False

    def agg(self, x):
        
        if self.sentence.words[x].unitword:#remove two-words thing into one
            limit = int(self.sentence.words[x].id.split("-")[0])
            self.sentence.words[x].head = self.sentence.words[x+1].head
            self.sentence.words[x].lemma = self.sentence.words[x+1].lemma
            self.sentence.words[x].upos = self.sentence.words[x+1].upos
            self.sentence.words[x].xpos = self.sentence.words[x+1].xpos
            self.sentence.words[x].feats = self.sentence.words[x+1].feats
            self.sentence.words[x].deprel = self.sentence.words[x+1].deprel
            self.sentence.words[x].deps = self.sentence.words[x+1].deps
            self.sentence.words[x].misc = self.sentence.words[x+1].misc
            self.sentence.words[x].id = str(limit)
            self.sentence.words[x].unitword = False
            del self.sentence.words[x+1]
            del self.sentence.words[x+1]

            for word in self.sentence.words:
                if word.unitword:
                    first_word_id = int(word.id.split("-")[0])
                    if first_word_id>limit:
                        word.id = str(first_word_id-1)+"-"+str(first_word_id)
                else:
                    if int(word.id) > limit:
                        word.id = str(int(word.id)-1)
                
                if word.head != "_" and int(word.head) > limit:
                    word.head = str(int(word.head)-1)
            self.first_time = True
            self.update_table()
            self.update_html()
            self.first_time = False
        
        else:#add two-elements below
            base_word = self.sentence.words[x]
            limit = int(base_word.id)

            for word in self.sentence.words:
                if word.unitword:
                    first_word_id = int(word.id.split("-")[0])
                    if first_word_id>limit:
                        word.id = str(first_word_id+1)+"-"+str(first_word_id+2)
                else:
                    if int(word.id) > limit:
                        word.id = str(int(word.id)+1)
                
                if word.head != "_" and int(word.head) > limit:
                    word.head = str(int(word.head)+1)


            w1 = Word("\t".join([str(limit), base_word.form, base_word.lemma, base_word.upos, base_word.xpos, base_word.feats, base_word.head, base_word.deprel, base_word.deps, "_"]), self.sentence.sent_address)
            w2 = Word("\t".join([str(limit+1), base_word.form, base_word.lemma, base_word.upos, base_word.xpos, base_word.feats, str(limit), base_word.deprel, base_word.deps, "_"]), self.sentence.sent_address)
            self.sentence.words = self.sentence.words[:x+1]+[w1, w2]+self.sentence.words[x+1:]
            base_word.id = str(limit)+"-"+str(limit+1)
            base_word.lemma = "_"
            base_word.upos = "_"
            base_word.xpos = "_"
            base_word.feats = "_"
            base_word.head = "_"
            base_word.deprel = "_"
            base_word.deps = "_"
            base_word.unitword = True
            self.first_time = True
            self.update_table()
            self.update_html()
            self.first_time = False

    
    def update_table(self):
        if str(self.sentence_id) in self.noteDictionary:
            self.qTextEdit2.setText(self.noteDictionary[str(self.sentence_id)])
        else:
            self.qTextEdit2.setText("Write your note here...")

        self.sentence = self.doc.sentences[self.sentence_id]

        self.tableWidget.setRowCount(len(self.sentence.words))
        self.tableWidget.setColumnCount(self.column_number)
        self.tableWidget.setHorizontalHeaderLabels(self.columns)

        for enum, word in enumerate(self.sentence.words):
            if word.unitword:
                self.tableWidget.setVerticalHeaderItem(enum, QTableWidgetItem("-"))
            else:
                self.tableWidget.setVerticalHeaderItem(enum, QTableWidgetItem("+"))

            dict_feat = {}
            uni_feats = re.split('\|', word.feats)
            if uni_feats[0] != "_":
                for uni_feat in uni_feats:
                    uf = re.split('\=', uni_feat)
                    dict_feat[uf[0]]=uf[1]

            for i in range(self.column_number):
                if self.columns[i]=="ID":
                    self.tableWidget.setItem(enum, i, QTableWidgetItem(word.id))
                elif self.columns[i]=="FORM":
                    self.tableWidget.setItem(enum, i, QTableWidgetItem(word.form))
                elif self.columns[i]=="LEMMA":
                    self.tableWidget.setItem(enum, i, QTableWidgetItem(word.lemma))
                elif self.columns[i]=="UPOS":
                    self.tableWidget.setItem(enum, i, QTableWidgetItem(word.upos))
                elif self.columns[i]=="XPOS":
                    self.tableWidget.setItem(enum, i, QTableWidgetItem(word.xpos))
                elif self.columns[i]=="FEATS":
                    self.tableWidget.setItem(enum, i, QTableWidgetItem(word.feats))
                elif self.columns[i]=="HEAD":
                    self.tableWidget.setItem(enum, i, QTableWidgetItem(word.head))
                elif self.columns[i]=="DEPREL":
                    self.tableWidget.setItem(enum, i, QTableWidgetItem(word.deprel))
                elif self.columns[i]=="DEPS":
                    self.tableWidget.setItem(enum, i, QTableWidgetItem(word.deps))
                elif self.columns[i]=="MISC":
                    self.tableWidget.setItem(enum, i, QTableWidgetItem(word.misc))
                else:
                    if self.columns[i] in dict_feat:
                        self.tableWidget.setItem(enum, i, QTableWidgetItem(dict_feat[self.columns[i]]))
                    else:
                        self.tableWidget.setItem(enum, i, QTableWidgetItem("_"))
            
        self.tableWidget.resizeColumnsToContents()

    def check_errors(self):
        index = ""
        for w in self.sentence.words:
            index += w.form + "(" + w.id + ") "
        index += "\n"
        error_list = get_errors(self.sentence.get_raw(), self.sentence.sent_id, self.language)
        if len(error_list)>0:
            error_raw_string = 'ERRORS:\n'
            for error in error_list:
                error_raw_string+=error+'\n'
            self.qTextEditError.setText(index + error_raw_string)
        else:
            self.qTextEditError.setText(index)
    def update_html(self):
        if not self.load_finished: #If the js function not loaded an image onto app it removes browser
            print("Load error!")
            self.splitter2.deleteLater()
            self.webView = QWebEngineView()
            self.splitter2 = QSplitter(Qt.Vertical)
            self.splitter2.addWidget(self.splitter)
            self.splitter2.addWidget(self.webView)
            self.vBoxLayout.addWidget(self.splitter2)
            self.webView.loadFinished.connect(self.finito)

        self.sentence = self.doc.sentences[self.sentence_id]
        if self.session_start:
            self.sentence_backup = copy.deepcopy(self.doc.sentences[self.sentence_id])
            self.session_start = False

        html = process_document(self.sentence)
        self.webView.setHtml(html)
        self.load_finished = False


    def cb_change(self):
        self.column_number = 0
        self.columns = []
        self.map_col = {}
        x = 0
        for i in range(self.chBoxLayout.count()):
            if isinstance(self.chBoxLayout.itemAt(i), QWidgetItem):
                wid = self.chBoxLayout.itemAt(i).widget()
                if wid.isChecked():
                    self.columns.append(wid.text())
                    self.column_number += 1
                    self.map_col[x] = wid.text()
                    x+=1
        for i in range(self.chBoxLayout_2.count()):
            if isinstance(self.chBoxLayout_2.itemAt(i), QWidgetItem):
                wid = self.chBoxLayout_2.itemAt(i).widget()
                if wid.isChecked():
                    self.columns.append(wid.text())
                    self.column_number += 1
                    self.map_col[x] = wid.text()
                    x+=1
        for i in range(self.chBoxLayout_3.count()):
            if isinstance(self.chBoxLayout_3.itemAt(i), QWidgetItem):
                wid = self.chBoxLayout_3.itemAt(i).widget()
                if wid.isChecked():
                    self.columns.append(wid.text())
                    self.column_number += 1
                    self.map_col[x] = wid.text()
                    x+=1
                    
        self.first_time = True
        self.update_table()
        self.first_time = False
                

    def handle_change(self, item):
        col = self.map_col[item.column()]
        text = item.text()
        #print(text)
        isSpace = False
        if text == "":
            if col!="ID" and col!="FORM" and col!="LEMMA" and col!="UPOS" and col!="XPOS" and col!="HEAD" and col!="DEPREL" and col!="DEPS" and col!="MISC":
                isSpace = True
            text = "_"
        row = item.row()
        self.sentence = self.doc.sentences[self.sentence_id]
        
        if col=="ID":
            self.sentence.words[row].id = text
        elif col=="FORM":
            self.sentence.words[row].form = text
        elif col=="LEMMA":
            self.sentence.words[row].lemma = text
        elif col=="UPOS":
            self.sentence.words[row].upos = text.upper()
        elif col=="XPOS":
            self.sentence.words[row].xpos = text
        elif col=="FEATS":
            self.sentence.words[row].feats = text
        elif col=="HEAD":
            self.sentence.words[row].head = text
        elif col=="DEPREL":
            self.sentence.words[row].deprel = text
        elif col=="DEPS":
            self.sentence.words[row].deps = text
        elif col=="MISC":
            self.sentence.words[row].misc = text
        else:
            cur_col = col
            if col=="Number[psor]":
                cur_col = "Number\[psor\]"
            if col=="Person[psor]":
                cur_col = "Person\[psor\]"
            if re.search(cur_col+'=\w*', self.sentence.words[row].feats) is None:
                if text!="_":
                    if self.sentence.words[row].feats=="_":
                        self.sentence.words[row].feats = col+"="+text
                    else:
                        sorted_feats = re.split('\|', self.sentence.words[row].feats)
                        match_col=""
                        match_val=""
                        for sorted_feat in sorted_feats:
                                sf = re.split('\=', sorted_feat)
                                if sf[0].lower()<col.lower():
                                    match_col = sf[0]
                                    match_val = sf[1]
                        if match_col=="":
                            self.sentence.words[row].feats = col+"="+text+"|"+self.sentence.words[row].feats
                        else:
                            cur_match_col=match_col
                            if match_col == "Number[psor]":
                                cur_match_col = "Number\[psor\]"
                            if match_col == "Person[psor]":
                                cur_match_col = "Person\[psor\]"
                            self.sentence.words[row].feats = re.sub(cur_match_col+'='+match_val, match_col+'='+match_val+"|"+col+"="+text, self.sentence.words[row].feats)
            elif isSpace:
                old_feats = re.split('\|', self.sentence.words[row].feats)
                new_feats = []
                for old_feat in old_feats:
                    if old_feat.split("=")[0]!=cur_col:
                        new_feats.append(old_feat)
                self.sentence.words[row].feats =  "|".join(new_feats)
            else:
                self.sentence.words[row].feats = re.sub(cur_col+'=\w*', col+"="+text, self.sentence.words[row].feats)

        if not self.first_time:
            self.doc.write()
            self.first_time = True
            self.writeNotes()

            self.update_table()
            self.update_html()
            self.check_errors()

            self.first_time = False
Exemplo n.º 11
0
import Doc
from Trimmer import trim

Doc.funkcja(1, 2, 3)

print(trim(Doc.funkcja.__doc__))
Exemplo n.º 12
0
def main():

    file = input('Please enter a file: ')
    try:
        #converts the file into a document object
        doc = d.Document(file)
        doc.generateWhole()

        words = []
        #makes a new list of all of the words in the file, removing any
        #punctuation and making everything lowercase, so that they will
        #all be counted as the same word
        for sent in doc.getSentences():
            if not sent.string[-1].isalpha():
                s = sent.string[:-1]
            else:
                s = sent.string
            w = [x.lower() for x in s.split()]
            words += w

        #uses the BasicStats object to create a dictionary of the
        #top 10 words used in the document
        """
        stats = b.BasicStats()
        stats.dic = b.BasicStats.createFreqMap(words)
        stats.sl = b.BasicStats.slinkFreq(words)
        stats.listFreq()
        top1 = stats.topN(10)
        print(top1)
        top2 = stats.newTopN(10)
        for i in range(10):
            print(top2.pop())
            
        top3 = stats.topNHeap(10)
        print(top3)

        bottom1 = stats.bottomN(10)
        print(bottom1)
        bottom2 = stats.bottomNHeap(10)
        print(bottom2)
        """

        stats = b.BasicStats()
        start1 = time.time()
        stats.dic = b.BasicStats.createFreqMap(words)
        top1 = stats.topN(100)
        end1 = time.time()
        print('Dictionary: {0:.5f} us.'.format(end1 - start1))
        """ The run time of this is n + 2m where n is the number of
words in the document and m is the length of the dictionary.
createFreqMap has a run time of n and topN has a runtime of 2m. m <= n
so n+2m =< 3n. Therefore, this is O(n).
"""

        start2 = time.time()
        stats.dic = b.BasicStats.createFreqMap(words)
        top2 = stats.topNHeap(100)
        end2 = time.time()
        print('Heap: {0:.5f} us.'.format(end2 - start2))
        """ The run time of this is n + 2m where n is the number of
words in the document and m is the length of the dictionary. createFreqMap has
a run time of n and topNHeap has a runtime of 2m.
m <= n, so n+2m =< 3n. Therefore, this is O(n). On average, we would still
expect the heap to perform faster than the dictionary."""
        """
        #makes a list of the number of times each of the top 10 words is used
        num = []
        for key in top:
            num.append(top[key])

        #makes a scatter plot using the number of time the top 10 words are used
        #as the y axis and the rank of those words (most to 10th most, along
        #along the x axis
        num.sort(reverse = True)
        plt = c.CommandLinePlotter()
        plt.twoDScatter(num)
        plt.barGraph(num)

        #makes a list of tuples of the words and their length
        wordList = []
        for key in top:
            wordList.append((key, top[key]))
        wordList.sort(key= lambda tup: tup[1], reverse = True)

        #prints the list of tuples as a key to know which word was the most
        #used to which was the 10th most used
        for i in range(len(wordList)):
            print('Word', i +1, wordList[i][0], end = '; ')"""

    #handles any exceptions that might occur from the DocumentStream class
    except ds.DocumentStreamError as E:
        print(E.data)
Exemplo n.º 13
0
 def setUp(self):
     self.doc = Doc(0)
Exemplo n.º 14
0
    elif accion == "2":             #crear un archivo
        if int(len(menu)) == 0:
           print("\nAún no hay carpetas registradas")
        else:
            nombre = input("Nombre del archivo que desea crear: ")
            tamaño = input("Tamaño del archivo que desea crear: ")
            tipo = input('''Tipo de archivo: 
    1- .DOC
    2- .MOV
    3- .WAV
    4- .PDF
    ==> ''')
            while not (tipo == "1" or tipo == "2" or tipo == "3" or tipo == "4"):
                tipo = input("Ingreso inválido, seleccione un tipo de archivo correcto: ")
            if tipo == "1":
                documento = Doc(nombre, "DOC", tamaño)
            elif tipo == "2":
                documento = Mov(nombre, "MOV", tamaño)
            elif tipo == "3":
                documento = Wav(nombre, "WAV", tamaño)
            else:
                documento = Pdf(nombre, "PDF", tamaño)

            carpeta = input(f"\nIngrese el nombre de la carpeta donde desea guardar {documento.nombre}: ")
            bolean = carpeta_existente(menu, carpeta)
            while not bolean:
                carpeta = input(f"\nCarpeta no existente. Ingrese el nombre de la carpeta donde desea guardar {documento.nombre}: ")
                bolean = carpeta_existente(menu, carpeta)
            for i in menu:
                if i.nombre == carpeta:
                    i.lista_archivos.append(documento)
Exemplo n.º 15
0
#!python3

import os
import re

import Doc

snippets = {}
Doc.SearchSourceSnippets(r"D:\GitHub\ezEngine\Code\Games\SampleGamePlugin", snippets)

Doc.ReplaceTargetSnippets("./docs", snippets)
Exemplo n.º 16
0
    def dump_top(self, top):
        self.sort_by_counter()
        return '\n'.join(record.print_entity() for record in self.histogram[:top])

    def dump(self):
        return '\n'.join(record.print_entity() for record in self.histogram)

"""
Arguments :
(0 - program name)
1 - input file
2 - output file
3 - stopwords
"""
if __name__ == "__main__":
    histogram = None
    if len(sys.argv) == 4:
        histogram = WordsHistogram(stopwords=sys.argv[3])
    else: histogram = WordsHistogram()

    with open(sys.argv[1], 'r') as inputfile:
        doc = Doc(0)
        doc.body = inputfile.read()
        histogram.accumulate(doc.parse_words())
    histogram.rank(5)
    histogram.sort_by_counter()

    with open(sys.argv[2], 'w') as outputfile:
        outputfile.write(histogram.dump())
Exemplo n.º 17
0
 def docNew(self):
     return Doc(self)
Exemplo n.º 18
0
#!python3

import os
import re

import Doc

snippets = {}
Doc.SearchSourceSnippets(r"D:\GitHub\ezEngine\Code", snippets)

Doc.ReplaceTargetSnippets("./docs", snippets)