def updateDatabase(self): #Database.connect() #projects = Database.getProject("MyProjectID") #print(projects) #Database.disconnect() #sys.exit() if not Text.isTrue(self.cfg["Database"]["Enable"]): Msg.showWarning("Database is NOT enabled in {0}".format( self.cfgPath)) return Database.connect() Database.setDebug(Text.toTrueOrFalse(self.cfg["Database"]["Debug"])) with Database.ORM.db_session: records = Database.Table.Project.get(ID=self.getProjectID()) if records is not None: records.delete() Database.commit() projectTable = Database.Table.Project( ID=self.getProjectID(), Title=Database.sanitize(self.cfg["Title"]), Description=Database.sanitize(self.cfg["Description"]), DateTime=self.frameworkParams["dateTime"], Workflow=self.cfg["Workflow"]) inputTable = Database.Table.Input( ProjectID=projectTable, Content=Database.sanitize( File.getContent(self.frameworkParams["inputPath"])), Source=Database.sanitize( self.cfg["Workflow"]["Input"]["Source"]), PluginName=Database.sanitize( self.cfg["Workflow"]["Input"]["Plugin"]), PluginMethod=Database.sanitize( self.cfg["Workflow"]["Input"]["Method"]), Plugin=self.cfg["Workflow"]["Input"]) analyzerTable = Database.Table.Analyzer( ProjectID=projectTable, Content=Database.sanitize( File.getContent(self.frameworkParams["analyzerPath"])), PluginName=Database.sanitize( self.cfg["Workflow"]["Analyzer"]["Plugin"]), PluginMethod=Database.sanitize( self.cfg["Workflow"]["Analyzer"]["Method"]), Plugin=self.cfg["Workflow"]["Analyzer"]) content = Database.sanitize( File.getContent(self.frameworkParams["translatorPath"])) translatorTable = Database.Table.Translator( ProjectID=projectTable, Content=content, ContentParsed=Result.parseTranslatorContent(content), PluginName=Database.sanitize( self.cfg["Workflow"]["Translator"]["Plugin"]), PluginMethod=Database.sanitize( self.cfg["Workflow"]["Translator"]["Method"]), Plugin=self.cfg["Workflow"]["Translator"]) outputTable = Database.Table.Output( ProjectID=projectTable, Content=Database.sanitize( File.getContent(self.frameworkParams["outputPath"])), Target=Database.sanitize( self.cfg["Workflow"]["Output"]["Target"]), PluginName=Database.sanitize( self.cfg["Workflow"]["Output"]["Plugin"]), PluginMethod=Database.sanitize( self.cfg["Workflow"]["Output"]["Method"]), Plugin=self.cfg["Workflow"]["Output"]) Database.disconnect()
class Article: ''' Class ini akan bertanggung jawab dengan text yang ada dalam artikel ''' def __init__(self, title, body, file_name='', table_name=''): self.title = title self.body = body self.keyword = [ ] # ini adalah keyword yang akan digunakan sebagai link self.table_name = table_name # nama tabel untuk offline support self.conn = Database(file_name) ''' Untuk mengekstrack keyword kita menggunakan rake ''' def extract_keyword(self): # Menggunakan beautiful soup untuk mengambil text dari artikel clean = self.get_clean_body() # Dnegan rake kita ekstract keyword r = Rake(min_length=1, max_length=1) r.extract_keywords_from_text(clean) self.keyword = r.get_ranked_phrases() # Debug di terminal untuk melihat jumlah keyword print('Keyword extracted with ' + str(len(self.keyword)) + ' words') # Menggunakan beautiful soup untuk mengambil teks def get_clean_body(self): return BeautifulSoup(self.body, 'lxml').text ''' Keyword yang ada akan di bandingkand engan judul artikel di wikipedia kemudian akan di beri link ''' def get_wiki(self): key = [] # File bisa didapat di kaggles file = open('titles.txt') for line in file: key.append(line[:-1].lower()) file.close() for i in self.keyword: if i not in key: self.keyword.remove(i) for k in self.keyword: self.body = self.body.replace( ' ' + k + ' ', ' <a href="https://en.wikipedia.org/wiki/' + k + '">' + k + '</a> ') def get_keyword(self): return self.keyword def get_title(self): return self.title def get_content(self): return self.body def clean_html(self): # Drop tag yang menyusahkan dari web td = TagDropper(['img', 'h4', 'svg', 'a', 'figure', 'div', 'path']) td.feed(self.body) self.body = td.get_text() ''' For Database ''' def save_article(self): self.save(['title', 'raw_article'], (self.title, self.body)) self.commit() def save(self, column, data): self.conn.insert(self.table_name, column, data) def commit(self): self.conn.commit()