def read_file(url, doc_ID, index): doc = Doc() doc.doc_ID = doc_ID file = open(url, 'r') content = list() page = BeautifulSoup(file.content, 'xml') content.append(page.find_all('p')[0].get_text()) content.append(page.find_all('meta', name= 'keywords')[0].get_text()) content.append(page.find_all('title')[0].get_text()) tokenizer(content, doc, index)
def open(self): filename = QFileDialog.getOpenFileName(self, 'Open File', '.')[0] self.notename = "notes-"+filename.split("/")[-1].split(".")[0]+".txt" self.uploadButton.hide() print(filename) self.doc = Doc(filename) if not os.path.exists(self.notename): open(self.notename, "w").close() self.construct()
def buscar(self,dni_,opc): #metodo para seleccionar id=dni_ #cargamos el dni_ en una variable propia #Recorremos los campos que pertenezcan a la fila con la id correspondiente self.cursor.execute("select * from abonados where id='" + str(id) + "'") #recogemos todos los campos en la varible resultado resultado = self.cursor.fetchone() if resultado == None: print("DNI no encontrado") menu.principal() dnierror.adv() else:#dependiendo del estadp de busqueda llama a una funcion u otra if opc==1: buscar.busqueda(resultado[0],resultado[1],resultado[2],resultado[3],resultado[4], resultado[5]) if opc==3: Doc.tool(resultado[0],resultado[1],resultado[2],resultado[3],resultado[4], resultado[5]) else: borrar.eliminar(resultado[0],resultado[1],resultado[2])
def Parse(self): assert self.canParse, "FileScanner.py: Could not open %s." % ( self.fileName) doc = Doc() doc.category = self.categoryName for line in self.GetComments(): if self.StrAtIndex(line, ' *', 0): line = line.replace(' * ', "") line = line.replace(' *', "") if '@' in line: # Split the line into words, then get the name of the category. words = line.split(' ') for word in words: if '@' in word: doc.category = word.split('@')[-1].capitalize() break # Join sentences together to make one long string. doc.description += line # If a sentence doesn't have a space at the end, # add it to preserve grammar. if len(line) > 0: if line[-1] != ' ': doc.description += ' ' elif '/*' not in line and '*/' not in line: print( 'FileScanner.py: Warning: Line %s of %s is missing a \' *\'; Line will be skipped.' % (self.currentLine + 1, self.docFileName)) if '*/' in line: # Check that the comment was right before a function or variable declaration or definition. doc.declaration = self.GetNextLine().strip() if ';' in doc.declaration or '{' in doc.declaration: doc.declaration = doc.declaration.replace(";", "").replace( "{", "") self.docs.append(doc) else: print( 'FileScanner.py: Warning: Line %s of %s was skipped due to missing a declaration or definition.' % (self.currentLine + 1, self.docFileName)) doc = Doc() doc.category = self.categoryName
class TestDoc(unittest.TestCase): def setUp(self): self.doc = Doc(0) def test_parse_sentences(self): self.doc.body = '' self.assertEquals(self.doc.parse_sentences(), []) self.doc.body = 'I love you' self.assertTrue(len(self.doc.parse_sentences()), 1) def test_parse_words(self): self.doc.body = '' self.assertEquals(self.doc.parse_words(), []) self.doc.body = 'I love you' self.assertEquals(self.doc.parse_words(), ['i', 'love', 'you']) @unittest.skip('test') def test_text_file(self): with open('sampleText.txt', 'r') as inputfile: self.doc.body = inputfile.read()
def update(self, method): '''This method appends and prepares the file doc entered along with entered attributes and then writes in the information into a text box at the bottom. ''' if method == "add": file = d.Document(self.entered_file) file.author = self.author file.genre = self.genre file.year = self.year self.fileL.append(file) self.text.insert(INSERT,' '*10 + str(file.fileName) + ' '*5 + str(file.genre) + ' '*5 + str(file.author) + ' '*5 + str(file.year) + '\n') # Makes all the entries blank self.fileEntry.delete(0,END) self.AuthorEnt.delete(0,END) self.YearEnt.delete(0,END) self.GenreEnt.delete(0,END)
def processCollection(self): """ Process documents collection of the system. """ # List of Document objects held by the system. docs = [] # Get list of document name. self.docNames = self.getDocList() self.docNum = len(self.docNames) if DEBUG: print 'document list:' print self.docNames return Doc.processDocs(self.docNames, self.docInfo['location'], self.docInfo['srcType'])
def predict(self): '''calls all the methods for each button that corresponds to a prediction method''' file = d.Document(self.entered_file) file.genre = self.genre file.year = self.year if self.statMethod == '1': data = [[None, f.assignGenre(file)]] pred = self.skGenre.eval(data) elif self.statMethod == '2': data = [[None, f.assignYear(file)]] pred = self.skYear.eval(data) elif self.statMethod == '3': data = [[None, f.assignGenre(file)]] pred = self.idGenre.evaluate(data)[0] elif self.statMethod == '4': data = [[None, f.assignYear(file)]] pred = self.idYear.evaluate(data)[0] elif self.statMethod == '5': data = f.predData(file, self.skTop.labels) pred = self.skTop.eval(data) elif self.statMethod == '6': data = f.predData(file, self.skBottom.labels) pred = self.skBottom.eval(data) elif self.statMethod == '7': data = f.predData(file, self.idTop.columns) pred = self.idTop.evaluate(data)[0] elif self.statMethod == '8': data = f.predData(file, self.idBottom.columns) pred = self.idBottom.evaluate(data)[0] elif self.statMethod == '9': newfile = self.applyPredFilt(file) data = f.predPCA(newfile, self.pca.labels) ind = self.pca.eval(data) pred = [self.fileL[ind].author] self.prediction = pred[0] print('The predicted author is: ' + str(self.prediction)) file.author = pred[0] self.fileL.append(file) self.text.insert( INSERT, ' ' * 10 + str(file.fileName) + ' ' * 5 + str(file.genre) + ' ' * 5 + str(file.author) + ' ' * 5 + str(file.year) + '\n') self.predictfileEntry.delete(0, END) self.predictyearEntry.delete(0, END) self.predictgenreEntry.delete(0, END)
def update(self, method): if method == 'reset': self.file = '' num = [] self.file_label_text.set(self.file) self.entry.delete(0, END) return try: doc = d.Document(self.entered_file) doc.generateWhole() words = [] for sent in doc.getSentences(): if not sent.string[-1].isalpha(): s = sent.string[:-1] else: s = sent.string w = [x.lower() for x in s.split()] words += w stats = b.BasicStats() stats.dic = b.BasicStats.createFreqMap(words) top = stats.topN(10) num = [] for key in top: num.append(top[key]) num.sort(reverse=True) if method == "bar": plot = Plotter(num) plot.barGraph() self.file = self.entered_file elif method == "scatter": plot = Plotter(num) plot.scatterPlot() self.file = self.entered_file except ds.DocumentStreamError as E: print(E.data) self.file_label_text.set(self.file) self.entry.delete(0, END)
class QDataViewer(QWidget): def __init__(self): QWidget.__init__(self) # Layout Init. self.language = 'ud' if len(sys.argv)>1: self.language = sys.argv[1] self.setGeometry(650, 300, 600, 600) self.setWindowTitle('Data Viewer') self.uploadButton = QPushButton('Load Conll File', self) self.sentence_id = 0 self.column_number = 10 self.columns = ["ID", "FORM", "LEMMA", "UPOS", "XPOS", "FEATS", "HEAD", "DEPREL", "DEPS", "MISC"] self.current_dict = {} self.load_finished = True self.first_time = True self.session_start = True self.map_col = {0:"ID", 1:"FORM", 2:"LEMMA", 3:"UPOS", 4:"XPOS", 5:"FEATS", 6:"HEAD", 7:"DEPREL", 8:"DEPS", 9:"MISC", 10:"Abbr", 11:"Animacy", 12:"Aspect", 13:"Case", 14:"Clusivity", 15:"Definite", 16:"Degree", 17:"Echo", 18:"Evident", 19:"Foreign", 20:"Gender", 21:"Mood", 22:"NounClass", 23:"Number", 24:"Number[psor]", 25:"NumType", 26:"Person", 27:"Person[psor]", 28:"Polarity", 29:"Polite", 30:"Poss", 31:"PronType", 32:"Reflex", 33:"Register", 34:"Tense", 35:"VerbForm", 36:"Voice"} self.doc = None self.vBoxLayout = QVBoxLayout() self.vBoxLayout.addWidget(self.uploadButton) self.setLayout(self.vBoxLayout) # Signal Init. self.connect(self.uploadButton, QtCore.SIGNAL('clicked()'), self.open) def open(self): filename = QFileDialog.getOpenFileName(self, 'Open File', '.')[0] self.notename = "notes-"+filename.split("/")[-1].split(".")[0]+".txt" self.uploadButton.hide() print(filename) self.doc = Doc(filename) if not os.path.exists(self.notename): open(self.notename, "w").close() self.construct() def writeNotes(self): if self.qTextEdit2.toPlainText() != "Write your note here...": if self.qTextEdit2.toPlainText() == "": if str(self.sentence_id) in self.noteDictionary: del self.noteDictionary[str(self.sentence_id)] else: self.noteDictionary[str(self.sentence_id)] = self.qTextEdit2.toPlainText().rstrip().replace("\r\n", " ").replace("\n", " ").replace("\r", " ") noteTxt = open(self.notename, "w") for noteKey in sorted(self.noteDictionary.keys()): noteTxt.write(noteKey+" --- "+self.noteDictionary[noteKey]+"\n") noteTxt.close() def go_prev(self): self.first_time = True self.writeNotes() if self.sentence_id>0: self.sentence_id-=1 self.update_table() self.session_start = True self.update_html() self.check_errors() self.qTextEdit.setText(str(self.sentence_id)) self.first_time = False def go_next(self): self.first_time = True self.writeNotes() if self.sentence_id<len(self.doc.sentences)-1: self.sentence_id+=1 self.update_table() self.session_start = True self.update_html() self.check_errors() self.qTextEdit.setText(str(self.sentence_id)) self.first_time = False def go(self): self.doc.write() self.first_time = True self.writeNotes() try: self.sentence_id = int(self.qTextEdit.toPlainText()) self.update_table() self.session_start = True self.update_html() self.check_errors() except Exception as e: print(e) self.qTextEdit.setText(str(self.sentence_id)) self.first_time = False def reset(self): if not self.first_time: self.first_time = True self.sentence = copy.deepcopy(self.sentence_backup) self.doc.sentences[self.sentence_id] = copy.deepcopy(self.sentence_backup) self.session_start = True self.doc.write() self.update_table() self.update_html() self.check_errors() self.first_time = False def construct(self): self.hBoxLayout = QHBoxLayout() self.prevButton = QPushButton("Prev", self) self.prevButton.setShortcut("Alt+O") self.resetButton = QPushButton("Reset", self) self.resetButton.setShortcut("Alt+R") self.qTextEditAddRow = QTextEdit() self.qTextEditAddRow.setFixedHeight(20) self.qTextEditAddRow.setFixedWidth(60) self.qTextEditDeleteRow = QTextEdit() self.qTextEditDeleteRow.setFixedHeight(20) self.qTextEditDeleteRow.setFixedWidth(60) self.qTextEdit = QTextEdit() self.qTextEdit.setFixedHeight(20) self.qTextEdit.setFixedWidth(60) self.qTextEdit2 = QTextEdit() self.qTextEdit2.setFixedHeight(20) self.qTextEdit2.setFixedWidth(500) self.shortcutText=QShortcut(QtGui.QKeySequence("Alt+M"), self) self.shortcutText.activated.connect(self.qTextEdit2.setFocus) self.goButton = QPushButton("Go", self) self.goButton.setShortcut("Alt+G") self.nextButton = QPushButton("Next", self) self.nextButton.setShortcut("Alt+P") self.addRowButton = QPushButton("Add Row", self) self.deleteRowButton = QPushButton("Delete Row", self) self.hBoxLayout.addWidget(self.prevButton) self.hBoxLayout.addStretch() self.hBoxLayout.addWidget(self.resetButton) self.hBoxLayout.addStretch() self.hBoxLayout.addWidget(self.qTextEditAddRow) self.hBoxLayout.addWidget(self.addRowButton) self.hBoxLayout.addStretch() self.hBoxLayout.addWidget(self.qTextEditDeleteRow) self.hBoxLayout.addWidget(self.deleteRowButton) self.hBoxLayout.addStretch() self.hBoxLayout.addWidget(self.qTextEdit) self.hBoxLayout.addWidget(self.goButton) self.hBoxLayout.addStretch() self.hBoxLayout.addWidget(self.qTextEdit2) self.hBoxLayout.addStretch() self.hBoxLayout.addWidget(self.nextButton) self.vBoxLayout.addLayout(self.hBoxLayout) self.chBoxLayout = QHBoxLayout() self.chBoxLayout.addStretch() cb_ids = ["ID", "FORM", "LEMMA", "UPOS", "XPOS", "FEATS", "HEAD", "DEPREL", "DEPS", "MISC"] cb_ids2 = ["Abbr", "Animacy", "Aspect", "Case", "Clusivity", "Definite", "Degree", "Echo", "Evident", "Foreign", "Gender", "Mood", "NounClass", "Number"] cb_ids3 = ["Number[psor]", "NumType", "Person", "Person[psor]", "Polarity", "Polite", "Poss", "PronType", "Reflex", "Register", "Tense", "VerbForm", "Voice"] for cb_id in cb_ids: cb = QCheckBox(cb_id) cb.setChecked(True) cb.stateChanged.connect(self.cb_change) self.chBoxLayout.addWidget(cb) self.chBoxLayout.addStretch() self.vBoxLayout.addLayout(self.chBoxLayout) self.chBoxLayout_2 = QHBoxLayout() self.chBoxLayout_2.addStretch() for cb_id in cb_ids2: cb = QCheckBox(cb_id) cb.setChecked(False) cb.stateChanged.connect(self.cb_change) self.chBoxLayout_2.addWidget(cb) self.chBoxLayout_2.addStretch() self.vBoxLayout.addLayout(self.chBoxLayout_2) self.chBoxLayout_3 = QHBoxLayout() self.chBoxLayout_3.addStretch() for cb_id in cb_ids3: cb = QCheckBox(cb_id) cb.setChecked(False) cb.stateChanged.connect(self.cb_change) self.chBoxLayout_3.addWidget(cb) self.chBoxLayout_3.addStretch() self.vBoxLayout.addLayout(self.chBoxLayout_3) self.qTextEdit.setText(str(self.sentence_id)) self.noteDictionary = {} noteFile = open(self.notename, "r") for note in noteFile: noteSplitted = note.split(" --- ") noteID = noteSplitted[0] noteContent = noteSplitted[1].rstrip() self.noteDictionary[noteID] = noteContent noteFile.close() self.connect(self.prevButton, QtCore.SIGNAL('clicked()'), self.go_prev) self.connect(self.resetButton, QtCore.SIGNAL('clicked()'), self.reset) self.connect(self.goButton, QtCore.SIGNAL('clicked()'), self.go) self.connect(self.nextButton, QtCore.SIGNAL('clicked()'), self.go_next) self.connect(self.addRowButton, QtCore.SIGNAL('clicked()'), self.add_row) self.connect(self.deleteRowButton, QtCore.SIGNAL('clicked()'), self.delete_row) # create table here self.tableWidget = QTableWidget(self) self.tableWidget.itemChanged.connect(self.handle_change) self.connect(self.tableWidget.verticalHeader(), QtCore.SIGNAL("sectionClicked(int)"), self.agg) self.qTextEditError = QTextEdit() self.qTextEditError.setReadOnly(True) self.splitter = QSplitter(Qt.Vertical) self.splitter.addWidget(self.tableWidget) self.splitter.addWidget(self.qTextEditError) self.vBoxLayout.addWidget(self.splitter) self.webView = QWebEngineView() self.update_table() self.update_html() self.check_errors() self.splitter2 = QSplitter(Qt.Vertical) self.splitter2.addWidget(self.splitter) self.splitter2.addWidget(self.webView) self.vBoxLayout.addWidget(self.splitter2) self.webView.loadFinished.connect(self.finito) self.first_time = False def finito(self): self.load_finished = True def add_row(self): if "-" not in self.qTextEditAddRow.toPlainText(): word_id = int(self.qTextEditAddRow.toPlainText()) possible_move = True new_sentence_words = [] for word in self.sentence.words: if word.unitword: x1 = int(word.id.split("-")[0]) x2 = int(word.id.split("-")[1]) if word_id == x1 or word_id == x2: possible_move = False if possible_move: for word in self.sentence.words: new_word = copy.deepcopy(word) if new_word.head != "_" and int(new_word.head) >= word_id: new_word.head = str(int(new_word.head) + 1) if new_word.unitword: new_word_id = int(new_word.id.split("-")[0]) else: new_word_id = int(new_word.id) if new_word_id < word_id: new_sentence_words.append(new_word) elif new_word_id == word_id: if new_word.unitword: x1 = int(new_word.id.split("-")[0]) x2 = int(new_word.id.split("-")[1]) w = Word("\t".join( [str(x1), new_word.form, "_", "_", "_", "_", new_word.head, "_", "_", "_"]), self.sentence.sent_address) new_word.id = str(x1 + 1) + "-" + str(x2 + 1) else: w = Word("\t".join( [new_word.id, new_word.form, "_", "_", "_", "_", new_word.head, "_", "_", "_"]), self.sentence.sent_address) new_word.id = str(int(new_word.id) + 1) new_sentence_words.append(w) new_sentence_words.append(new_word) elif new_word_id > word_id: if new_word.unitword: x1 = int(new_word.id.split("-")[0]) x2 = int(new_word.id.split("-")[1]) new_word.id = str(x1 + 1) + "-" + str(x2 + 1) else: new_word.id = str(int(new_word.id) + 1) new_sentence_words.append(new_word) self.sentence.words = copy.deepcopy(new_sentence_words) self.first_time = True self.update_table() self.update_html() self.first_time = False def delete_row(self): if "-" not in self.qTextEditDeleteRow.toPlainText(): word_id = int(self.qTextEditDeleteRow.toPlainText()) possible_move = True new_sentence_words = [] for word in self.sentence.words: if word.unitword: x1 = int(word.id.split("-")[0]) x2 = int(word.id.split("-")[1]) if word_id == x1 or word_id == x2: possible_move = False if not word.head == "_": if int(word.head) == word_id: possible_move = False if possible_move: for word in self.sentence.words: new_word = copy.deepcopy(word) if new_word.head != "_" and int(new_word.head) >= word_id: new_word.head = str(int(new_word.head) - 1) if new_word.unitword: new_word_id = int(new_word.id.split("-")[0]) else: new_word_id = int(new_word.id) if new_word_id < word_id: new_sentence_words.append(new_word) elif new_word_id > word_id: if new_word.unitword: x1 = int(new_word.id.split("-")[0]) x2 = int(new_word.id.split("-")[1]) new_word.id = str(x1 - 1) + "-" + str(x2 - 1) else: new_word.id = str(int(new_word.id) - 1) new_sentence_words.append(new_word) self.sentence.words = copy.deepcopy(new_sentence_words) self.first_time = True self.update_table() self.update_html() self.first_time = False def agg(self, x): if self.sentence.words[x].unitword:#remove two-words thing into one limit = int(self.sentence.words[x].id.split("-")[0]) self.sentence.words[x].head = self.sentence.words[x+1].head self.sentence.words[x].lemma = self.sentence.words[x+1].lemma self.sentence.words[x].upos = self.sentence.words[x+1].upos self.sentence.words[x].xpos = self.sentence.words[x+1].xpos self.sentence.words[x].feats = self.sentence.words[x+1].feats self.sentence.words[x].deprel = self.sentence.words[x+1].deprel self.sentence.words[x].deps = self.sentence.words[x+1].deps self.sentence.words[x].misc = self.sentence.words[x+1].misc self.sentence.words[x].id = str(limit) self.sentence.words[x].unitword = False del self.sentence.words[x+1] del self.sentence.words[x+1] for word in self.sentence.words: if word.unitword: first_word_id = int(word.id.split("-")[0]) if first_word_id>limit: word.id = str(first_word_id-1)+"-"+str(first_word_id) else: if int(word.id) > limit: word.id = str(int(word.id)-1) if word.head != "_" and int(word.head) > limit: word.head = str(int(word.head)-1) self.first_time = True self.update_table() self.update_html() self.first_time = False else:#add two-elements below base_word = self.sentence.words[x] limit = int(base_word.id) for word in self.sentence.words: if word.unitword: first_word_id = int(word.id.split("-")[0]) if first_word_id>limit: word.id = str(first_word_id+1)+"-"+str(first_word_id+2) else: if int(word.id) > limit: word.id = str(int(word.id)+1) if word.head != "_" and int(word.head) > limit: word.head = str(int(word.head)+1) w1 = Word("\t".join([str(limit), base_word.form, base_word.lemma, base_word.upos, base_word.xpos, base_word.feats, base_word.head, base_word.deprel, base_word.deps, "_"]), self.sentence.sent_address) w2 = Word("\t".join([str(limit+1), base_word.form, base_word.lemma, base_word.upos, base_word.xpos, base_word.feats, str(limit), base_word.deprel, base_word.deps, "_"]), self.sentence.sent_address) self.sentence.words = self.sentence.words[:x+1]+[w1, w2]+self.sentence.words[x+1:] base_word.id = str(limit)+"-"+str(limit+1) base_word.lemma = "_" base_word.upos = "_" base_word.xpos = "_" base_word.feats = "_" base_word.head = "_" base_word.deprel = "_" base_word.deps = "_" base_word.unitword = True self.first_time = True self.update_table() self.update_html() self.first_time = False def update_table(self): if str(self.sentence_id) in self.noteDictionary: self.qTextEdit2.setText(self.noteDictionary[str(self.sentence_id)]) else: self.qTextEdit2.setText("Write your note here...") self.sentence = self.doc.sentences[self.sentence_id] self.tableWidget.setRowCount(len(self.sentence.words)) self.tableWidget.setColumnCount(self.column_number) self.tableWidget.setHorizontalHeaderLabels(self.columns) for enum, word in enumerate(self.sentence.words): if word.unitword: self.tableWidget.setVerticalHeaderItem(enum, QTableWidgetItem("-")) else: self.tableWidget.setVerticalHeaderItem(enum, QTableWidgetItem("+")) dict_feat = {} uni_feats = re.split('\|', word.feats) if uni_feats[0] != "_": for uni_feat in uni_feats: uf = re.split('\=', uni_feat) dict_feat[uf[0]]=uf[1] for i in range(self.column_number): if self.columns[i]=="ID": self.tableWidget.setItem(enum, i, QTableWidgetItem(word.id)) elif self.columns[i]=="FORM": self.tableWidget.setItem(enum, i, QTableWidgetItem(word.form)) elif self.columns[i]=="LEMMA": self.tableWidget.setItem(enum, i, QTableWidgetItem(word.lemma)) elif self.columns[i]=="UPOS": self.tableWidget.setItem(enum, i, QTableWidgetItem(word.upos)) elif self.columns[i]=="XPOS": self.tableWidget.setItem(enum, i, QTableWidgetItem(word.xpos)) elif self.columns[i]=="FEATS": self.tableWidget.setItem(enum, i, QTableWidgetItem(word.feats)) elif self.columns[i]=="HEAD": self.tableWidget.setItem(enum, i, QTableWidgetItem(word.head)) elif self.columns[i]=="DEPREL": self.tableWidget.setItem(enum, i, QTableWidgetItem(word.deprel)) elif self.columns[i]=="DEPS": self.tableWidget.setItem(enum, i, QTableWidgetItem(word.deps)) elif self.columns[i]=="MISC": self.tableWidget.setItem(enum, i, QTableWidgetItem(word.misc)) else: if self.columns[i] in dict_feat: self.tableWidget.setItem(enum, i, QTableWidgetItem(dict_feat[self.columns[i]])) else: self.tableWidget.setItem(enum, i, QTableWidgetItem("_")) self.tableWidget.resizeColumnsToContents() def check_errors(self): index = "" for w in self.sentence.words: index += w.form + "(" + w.id + ") " index += "\n" error_list = get_errors(self.sentence.get_raw(), self.sentence.sent_id, self.language) if len(error_list)>0: error_raw_string = 'ERRORS:\n' for error in error_list: error_raw_string+=error+'\n' self.qTextEditError.setText(index + error_raw_string) else: self.qTextEditError.setText(index) def update_html(self): if not self.load_finished: #If the js function not loaded an image onto app it removes browser print("Load error!") self.splitter2.deleteLater() self.webView = QWebEngineView() self.splitter2 = QSplitter(Qt.Vertical) self.splitter2.addWidget(self.splitter) self.splitter2.addWidget(self.webView) self.vBoxLayout.addWidget(self.splitter2) self.webView.loadFinished.connect(self.finito) self.sentence = self.doc.sentences[self.sentence_id] if self.session_start: self.sentence_backup = copy.deepcopy(self.doc.sentences[self.sentence_id]) self.session_start = False html = process_document(self.sentence) self.webView.setHtml(html) self.load_finished = False def cb_change(self): self.column_number = 0 self.columns = [] self.map_col = {} x = 0 for i in range(self.chBoxLayout.count()): if isinstance(self.chBoxLayout.itemAt(i), QWidgetItem): wid = self.chBoxLayout.itemAt(i).widget() if wid.isChecked(): self.columns.append(wid.text()) self.column_number += 1 self.map_col[x] = wid.text() x+=1 for i in range(self.chBoxLayout_2.count()): if isinstance(self.chBoxLayout_2.itemAt(i), QWidgetItem): wid = self.chBoxLayout_2.itemAt(i).widget() if wid.isChecked(): self.columns.append(wid.text()) self.column_number += 1 self.map_col[x] = wid.text() x+=1 for i in range(self.chBoxLayout_3.count()): if isinstance(self.chBoxLayout_3.itemAt(i), QWidgetItem): wid = self.chBoxLayout_3.itemAt(i).widget() if wid.isChecked(): self.columns.append(wid.text()) self.column_number += 1 self.map_col[x] = wid.text() x+=1 self.first_time = True self.update_table() self.first_time = False def handle_change(self, item): col = self.map_col[item.column()] text = item.text() #print(text) isSpace = False if text == "": if col!="ID" and col!="FORM" and col!="LEMMA" and col!="UPOS" and col!="XPOS" and col!="HEAD" and col!="DEPREL" and col!="DEPS" and col!="MISC": isSpace = True text = "_" row = item.row() self.sentence = self.doc.sentences[self.sentence_id] if col=="ID": self.sentence.words[row].id = text elif col=="FORM": self.sentence.words[row].form = text elif col=="LEMMA": self.sentence.words[row].lemma = text elif col=="UPOS": self.sentence.words[row].upos = text.upper() elif col=="XPOS": self.sentence.words[row].xpos = text elif col=="FEATS": self.sentence.words[row].feats = text elif col=="HEAD": self.sentence.words[row].head = text elif col=="DEPREL": self.sentence.words[row].deprel = text elif col=="DEPS": self.sentence.words[row].deps = text elif col=="MISC": self.sentence.words[row].misc = text else: cur_col = col if col=="Number[psor]": cur_col = "Number\[psor\]" if col=="Person[psor]": cur_col = "Person\[psor\]" if re.search(cur_col+'=\w*', self.sentence.words[row].feats) is None: if text!="_": if self.sentence.words[row].feats=="_": self.sentence.words[row].feats = col+"="+text else: sorted_feats = re.split('\|', self.sentence.words[row].feats) match_col="" match_val="" for sorted_feat in sorted_feats: sf = re.split('\=', sorted_feat) if sf[0].lower()<col.lower(): match_col = sf[0] match_val = sf[1] if match_col=="": self.sentence.words[row].feats = col+"="+text+"|"+self.sentence.words[row].feats else: cur_match_col=match_col if match_col == "Number[psor]": cur_match_col = "Number\[psor\]" if match_col == "Person[psor]": cur_match_col = "Person\[psor\]" self.sentence.words[row].feats = re.sub(cur_match_col+'='+match_val, match_col+'='+match_val+"|"+col+"="+text, self.sentence.words[row].feats) elif isSpace: old_feats = re.split('\|', self.sentence.words[row].feats) new_feats = [] for old_feat in old_feats: if old_feat.split("=")[0]!=cur_col: new_feats.append(old_feat) self.sentence.words[row].feats = "|".join(new_feats) else: self.sentence.words[row].feats = re.sub(cur_col+'=\w*', col+"="+text, self.sentence.words[row].feats) if not self.first_time: self.doc.write() self.first_time = True self.writeNotes() self.update_table() self.update_html() self.check_errors() self.first_time = False
import Doc from Trimmer import trim Doc.funkcja(1, 2, 3) print(trim(Doc.funkcja.__doc__))
def main(): file = input('Please enter a file: ') try: #converts the file into a document object doc = d.Document(file) doc.generateWhole() words = [] #makes a new list of all of the words in the file, removing any #punctuation and making everything lowercase, so that they will #all be counted as the same word for sent in doc.getSentences(): if not sent.string[-1].isalpha(): s = sent.string[:-1] else: s = sent.string w = [x.lower() for x in s.split()] words += w #uses the BasicStats object to create a dictionary of the #top 10 words used in the document """ stats = b.BasicStats() stats.dic = b.BasicStats.createFreqMap(words) stats.sl = b.BasicStats.slinkFreq(words) stats.listFreq() top1 = stats.topN(10) print(top1) top2 = stats.newTopN(10) for i in range(10): print(top2.pop()) top3 = stats.topNHeap(10) print(top3) bottom1 = stats.bottomN(10) print(bottom1) bottom2 = stats.bottomNHeap(10) print(bottom2) """ stats = b.BasicStats() start1 = time.time() stats.dic = b.BasicStats.createFreqMap(words) top1 = stats.topN(100) end1 = time.time() print('Dictionary: {0:.5f} us.'.format(end1 - start1)) """ The run time of this is n + 2m where n is the number of words in the document and m is the length of the dictionary. createFreqMap has a run time of n and topN has a runtime of 2m. m <= n so n+2m =< 3n. Therefore, this is O(n). """ start2 = time.time() stats.dic = b.BasicStats.createFreqMap(words) top2 = stats.topNHeap(100) end2 = time.time() print('Heap: {0:.5f} us.'.format(end2 - start2)) """ The run time of this is n + 2m where n is the number of words in the document and m is the length of the dictionary. createFreqMap has a run time of n and topNHeap has a runtime of 2m. m <= n, so n+2m =< 3n. Therefore, this is O(n). On average, we would still expect the heap to perform faster than the dictionary.""" """ #makes a list of the number of times each of the top 10 words is used num = [] for key in top: num.append(top[key]) #makes a scatter plot using the number of time the top 10 words are used #as the y axis and the rank of those words (most to 10th most, along #along the x axis num.sort(reverse = True) plt = c.CommandLinePlotter() plt.twoDScatter(num) plt.barGraph(num) #makes a list of tuples of the words and their length wordList = [] for key in top: wordList.append((key, top[key])) wordList.sort(key= lambda tup: tup[1], reverse = True) #prints the list of tuples as a key to know which word was the most #used to which was the 10th most used for i in range(len(wordList)): print('Word', i +1, wordList[i][0], end = '; ')""" #handles any exceptions that might occur from the DocumentStream class except ds.DocumentStreamError as E: print(E.data)
def setUp(self): self.doc = Doc(0)
elif accion == "2": #crear un archivo if int(len(menu)) == 0: print("\nAún no hay carpetas registradas") else: nombre = input("Nombre del archivo que desea crear: ") tamaño = input("Tamaño del archivo que desea crear: ") tipo = input('''Tipo de archivo: 1- .DOC 2- .MOV 3- .WAV 4- .PDF ==> ''') while not (tipo == "1" or tipo == "2" or tipo == "3" or tipo == "4"): tipo = input("Ingreso inválido, seleccione un tipo de archivo correcto: ") if tipo == "1": documento = Doc(nombre, "DOC", tamaño) elif tipo == "2": documento = Mov(nombre, "MOV", tamaño) elif tipo == "3": documento = Wav(nombre, "WAV", tamaño) else: documento = Pdf(nombre, "PDF", tamaño) carpeta = input(f"\nIngrese el nombre de la carpeta donde desea guardar {documento.nombre}: ") bolean = carpeta_existente(menu, carpeta) while not bolean: carpeta = input(f"\nCarpeta no existente. Ingrese el nombre de la carpeta donde desea guardar {documento.nombre}: ") bolean = carpeta_existente(menu, carpeta) for i in menu: if i.nombre == carpeta: i.lista_archivos.append(documento)
#!python3 import os import re import Doc snippets = {} Doc.SearchSourceSnippets(r"D:\GitHub\ezEngine\Code\Games\SampleGamePlugin", snippets) Doc.ReplaceTargetSnippets("./docs", snippets)
def dump_top(self, top): self.sort_by_counter() return '\n'.join(record.print_entity() for record in self.histogram[:top]) def dump(self): return '\n'.join(record.print_entity() for record in self.histogram) """ Arguments : (0 - program name) 1 - input file 2 - output file 3 - stopwords """ if __name__ == "__main__": histogram = None if len(sys.argv) == 4: histogram = WordsHistogram(stopwords=sys.argv[3]) else: histogram = WordsHistogram() with open(sys.argv[1], 'r') as inputfile: doc = Doc(0) doc.body = inputfile.read() histogram.accumulate(doc.parse_words()) histogram.rank(5) histogram.sort_by_counter() with open(sys.argv[2], 'w') as outputfile: outputfile.write(histogram.dump())
def docNew(self): return Doc(self)
#!python3 import os import re import Doc snippets = {} Doc.SearchSourceSnippets(r"D:\GitHub\ezEngine\Code", snippets) Doc.ReplaceTargetSnippets("./docs", snippets)