def __init__(self, tableName="document", copy=True): from contextionaryDatabase import Table from Context import Context self.tableName = tableName self.copy = copy if self.copy == True: self.tableName = tableName + '_temp' context = Context() self.defaultColumns = { "document_id": "serial", "document_title": "varchar(255)", "context_id": "bigint", "document_content": "text", "document_path": "text" } self.defaultPrimaryKeys = ["document_id"] self.defaultUnique = ["document_path"] self.defaultForeignKeys = { "context_id": (context.tableName, "context_id") } self.getTriggerFunction = None self.Table = Table(self.tableName)
def __init__(self, tableName="input_text_keywords", copy=False): from contextionaryDatabase import Table self.tableName = tableName self.copy = copy if self.copy == True: self.tableName = tableName + '_temp' self.defaultColumns = { "input_text_id": "bigint", "context_id": "bigint", "keyword_id": "bigint", "keyword_position": "bigint[]", "keyword_text": "text", "phrase_id": "bigint" } self.defaultPrimaryKeys = ["input_text_id", "context_id", "keyword_id"] self.defaultForeignKeys = None self.defaultUnique = None self.tableDependencies = [ "input_text_word_position", "input_text_context_identifier", "input_text_phrase_count", "context_phrase", "phrase" ] self.Table = Table(self.tableName) self.getTriggerFunction = self.triggerFunction()
def __init__(self, tableName="frequency_distance", copy=False): from contextionaryDatabase import Table self.tableName = tableName self.copy = copy if self.copy == True: self.tableName = tableName + '_temp' self.defaultColumns = { "context_id": "bigint", "phrase_id": "bigint", "phrase_relative_frequency": "decimal", "phrase_distance_to_context": "decimal", "phrase_difficulty": "int" } self.defaultPrimaryKeys = ["context_id", "phrase_id"] self.defaultForeignKeys = None self.tableDependencies = [ "context_phrase", "phrase_vector_space", "phrase_distance_to_context" ] self.defaultUnique = None self.Table = Table(self.tableName) self.getTriggerFunction = self.triggerFunction()
def __init__(self, tableName="related_phrase", copy=False): from contextionaryDatabase import Table self.tableName = tableName self.copy = copy if self.copy == True: self.tableName = tableName + '_temp' self.defaultColumns = { "context_id": "bigint", "context_phrase_id": "bigint", "related_phrase_id": "bigint", "phrase_bonding_index": "decimal" } self.defaultPrimaryKeys = [ "context_id", "context_phrase_id", "related_phrase_id" ] self.defaultForeignKeys = None self.tableDependencies = [ "context_phrase", "phrase", "context_axis", "phrase_origin", "document" ] self.defaultUnique = None self.Table = Table(self.tableName) self.getTriggerFunction = self.triggerFunction()
def __init__(self, tableName = "input_text", copy = False): from contextionaryDatabase import Table self.tableName = tableName self.copy = copy if self.copy == True: self.tableName = tableName + '_temp' self.defaultColumns = {"input_text_id": "serial", "input_text": "text"} self.defaultPrimaryKeys = ["input_text_id"] self.defaultUnique = ["input_text"] self.defaultForeignKeys = None self.getTriggerFunction = None self.Table = Table(self.tableName)
def __init__(self, tableName = "context", copy = True): self.tableName = tableName self.copy = copy if self.copy == True: self.tableName = tableName + '_temp' # dictionary of default columns {key: value} # key: column name string # value: data type string self.defaultColumns = {"context_id": "serial", "context_name": "varchar(255)", "parent_id": "bigint NULL", "context_children_id": "bigint[] NULL", "context_picture": "varchar(255)", "directory_level": "bigint", "context_path": "text"} # primary key [item] # item: column name string self.defaultPrimaryKeys = ["context_id"] # list of natural keys (if primary key is a surrogate) [item] # item: column name string self.defaultUnique = ["context_path"] # dictionary of foreign keys {key: value} # key: column name string # value: references tuple (first, second) # first: reference table name string # second: reference column name string self.defaultForeignKeys = {"parent_id": (self.tableName, "context_id")} # get trigger function string self.getTriggerFunction = None # create table self.Table = Table(self.tableName)
def __init__(self, tableName="context_axis", copy=False): self.tableName = tableName self.copy = copy if self.copy == True: self.tableName = tableName + '_temp' self.defaultColumns = { "context_id": "bigint", "independent_context_id": "bigint", "axis_coordinate": "int" } self.defaultPrimaryKeys = ["context_id", "independent_context_id"] self.defaultForeignKeys = None self.tableDependencies = ["context"] self.defaultUnique = None self.Table = Table(self.tableName) self.getTriggerFunction = self.triggerFunction()
def __init__(self, tableName="phrase_spelling_similarity", copy=False): from contextionaryDatabase import Table self.tableName = tableName self.copy = copy if self.copy == True: self.tableName = tableName + '_temp' self.defaultColumns = { "phrase_id": "bigint", "similar_spelling_phrase_id": "bigint", "similarity_index": "bigint" } self.defaultPrimaryKeys = ["phrase_id", "similar_spelling_phrase_id"] self.defaultForeignKeys = None self.defaultUnique = None self.tableDependencies = ["context_phrase", "phrase"] self.Table = Table(self.tableName) self.getTriggerFunction = self.triggerFunction()
def __init__(self, tableName="phrase", copy=False): from contextionaryDatabase import Table self.tableName = tableName self.copy = copy if self.copy == True: self.tableName = tableName + '_temp' self.defaultColumns = { "phrase_id": "serial", "phrase_text": "varchar(255)", "phrase_length": "smallint", "red_flag": "smallint" } self.defaultPrimaryKeys = ["phrase_id"] self.defaultUnique = ["phrase_text"] self.defaultForeignKeys = None self.tableDependencies = ["phrase_origin"] self.Table = Table(self.tableName) self.getTriggerFunction = self.triggerFunction()
def __init__(self, tableName="input_text_word_position", copy=False): from contextionaryDatabase import Table self.tableName = tableName self.copy = copy if self.copy == True: self.tableName = tableName + '_temp' self.defaultColumns = { "input_text_id": "bigint", "input_text_phrase_id": "bigint", "phrase_text": "text", "phrase_position": "bigint", "phrase_length": "bigint", "phrase_components": "bigint[]" } self.defaultPrimaryKeys = ["input_text_id", "input_text_phrase_id"] self.defaultForeignKeys = None self.defaultUnique = None self.Table = Table(self.tableName) self.getTriggerFunction = None
def __init__(self, tableName="shared_word", copy=False): from contextionaryDatabase import Table self.tableName = tableName self.copy = copy if self.copy == True: self.tableName = tableName + '_temp' self.defaultColumns = { "long_phrase_id": "bigint", "sibling_id": "bigint", "shared_word": "varchar(255)", "shared_word_position_in_long_phrase": "bigint[]", "shared_word_position_in_sibling": "bigint[]" } self.defaultPrimaryKeys = [ "long_phrase_id", "sibling_id", "shared_word" ] self.defaultForeignKeys = None self.defaultUnique = None self.tableDependencies = ["context_phrase", "phrase"] self.Table = Table(self.tableName) self.getTriggerFunction = self.triggerFunction()
def __init__(self, tableName="phrase_origin", copy=True): from contextionaryDatabase import Table from Document import Document self.tableName = tableName self.copy = copy if self.copy == True: self.tableName = tableName + '_temp' document = Document() self.defaultColumns = { "document_id": "bigint", "phrase_text": "text", "phrase_count_per_document": "integer" } self.defaultPrimaryKeys = ["document_id", "phrase_text"] self.defaultUnique = None self.defaultForeignKeys = { "document_id": (document.tableName, "document_id") } self.Table = Table(self.tableName) self.getTriggerFunction = None
class Document(object): def __init__(self, tableName="document", copy=True): from contextionaryDatabase import Table from Context import Context self.tableName = tableName self.copy = copy if self.copy == True: self.tableName = tableName + '_temp' context = Context() self.defaultColumns = { "document_id": "serial", "document_title": "varchar(255)", "context_id": "bigint", "document_content": "text", "document_path": "text" } self.defaultPrimaryKeys = ["document_id"] self.defaultUnique = ["document_path"] self.defaultForeignKeys = { "context_id": (context.tableName, "context_id") } self.getTriggerFunction = None self.Table = Table(self.tableName) def addRecord(self, documentPath, connectDB): """ Adds record to document table and phrase origin table """ print("add document start...", documentPath) context = Context() phraseOrigin = PhraseOrigin() nonPrimeAttributes = [ x for x in list(self.defaultColumns.keys()) if x not in self.defaultPrimaryKeys ] if 'Linux' in platform.platform(): contextPath = "/".join(documentPath.split("/")[:-1]) documentFilename = documentPath.split("/")[-1] else: contextPath = "\\".join(documentPath.split("\\")[:-1]) documentFilename = documentPath.split("\\")[-1] documentTitle = documentFilename[:len(documentFilename) - 4] contextID = context.Table.selectColumn("context_id", {"context_path": [contextPath]}) contextID = contextID[0] file = open(documentPath, "r", encoding="UTF-8-sig") documentContent = file.read() file.close() cur = connectDB.connection.cursor() try: strSQL1 = sql.SQL("""INSERT INTO {} ({}) VALUES ({})""") strSQL2 = sql.SQL(', ').join( map(sql.Identifier, nonPrimeAttributes)) strSQL3 = sql.SQL(', ').join(sql.Placeholder() * len(nonPrimeAttributes)) cur.execute( strSQL1.format(sql.Identifier(self.tableName), strSQL2, strSQL3), ([documentTitle, contextID, documentContent, documentPath])) finally: cur.close() documentID = self.Table.selectColumn("document_id", {"document_path": [documentPath]}) if phraseOrigin.Table.exists(): textProcessor = TextProcessor(documentContent, config.PARSE['phraseMaxLength']) phraseDictList = textProcessor.phraseCount.values() for phraseDict in phraseDictList: for key, val in phraseDict.items(): phraseOrigin.addRecord(documentID[0], key, val, connectDB) print("add document end...", documentPath) def deleteRecord(self, documentPath, connectDB): """ Deletes record from document table """ cur = connectDB.connection.cursor() try: strSQL = sql.SQL("""DELETE FROM {} WHERE document_path = %s""") cur.execute(strSQL.format(sql.Identifier(self.tableName)), ([documentPath])) finally: cur.close()
class InputText(object): def __init__(self, tableName = "input_text", copy = False): from contextionaryDatabase import Table self.tableName = tableName self.copy = copy if self.copy == True: self.tableName = tableName + '_temp' self.defaultColumns = {"input_text_id": "serial", "input_text": "text"} self.defaultPrimaryKeys = ["input_text_id"] self.defaultUnique = ["input_text"] self.defaultForeignKeys = None self.getTriggerFunction = None self.Table = Table(self.tableName) def addRecord(self, text, connectDB): """ Adds record to "input text" and "input text phrase count" and "input text word position" This process is analogous to the addRecord module in the Document class """ from InputTextPhraseCount import InputTextPhraseCount from InputTextWordPosition import InputTextWordPosition from psycopg2 import connect, sql from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT nonPrimeAttributes = [x for x in list(self.defaultColumns.keys()) if x not in self.defaultPrimaryKeys] cur = connectDB.connection.cursor() try: strSQL1 = sql.SQL("""INSERT INTO {} ({}) VALUES ({})""") strSQL2 = sql.SQL(', ').join(map(sql.Identifier, nonPrimeAttributes)) strSQL3 = sql.SQL(', ').join(sql.Placeholder() * len(nonPrimeAttributes)) cur.execute(strSQL1.format(sql.Identifier(self.tableName), strSQL2, strSQL3), ([text])) finally: cur.close() inputTextID = self.Table.selectColumn("input_text_id", {"input_text": [text]}) inputTextPhraseCount = InputTextPhraseCount() if inputTextPhraseCount.Table.exists(): from Document import TextProcessor textProcessor = TextProcessor(text, config.PARSE['phraseMaxLength']) phraseDictList = textProcessor.phraseCount.values() for phraseDict in phraseDictList: for key, val in phraseDict.items(): inputTextPhraseCount.addRecord(inputTextID[0], key, val, connectDB) inputTextWordPosition = InputTextWordPosition() if inputTextWordPosition.Table.exists(): maxPL = config.PARSE['phraseMaxLength'] tp = TextProcessor(text, maxPL) PT = tp.getWordOrderedList() ID = 0 PL = 0 PID_dict = dict() for i in range(len(PT), len(PT)-maxPL, -1): PL += 1 PP = list(range(1, len(PT[0:i]) + 1)) for pp in PP: ID += 1 PID_dict.update({(pp,PL) : ID}) PID_dict2 = dict() for key, val in PID_dict.items(): children = [] PP = key[0] PL = key[1] for pl in range(1, PL+1): start = PID_dict[(PP, pl)] children.extend(list(range(start,start+PL-pl+1))) ngram = " ".join(PT[(PP-1):(PP+PL-1)]) dict_key = PID_dict[key] dict_value = (key, ngram, children) PID_dict2.update({dict_key: dict_value}) for key, val in PID_dict2.items(): inputTextPhraseID = key phraseText = val[1] phrasePosition = val[0][0] phraseLength = val[0][1] phraseComponents = val[2] inputTextWordPosition.addRecord(inputTextID[0], inputTextPhraseID, phraseText, phrasePosition, phraseLength, phraseComponents, connectDB) def deleteRecord(self, text, connectDB): """ Deletes record from input text """ cur = connectDB.connection.cursor() try: strSQL = sql.SQL("""DELETE FROM {} WHERE input_text = %s""") cur.execute(strSQL.format(sql.Identifier(self.tableName)), ([text])) finally: cur.close()
class Context(object): def __init__(self, tableName = "context", copy = True): self.tableName = tableName self.copy = copy if self.copy == True: self.tableName = tableName + '_temp' # dictionary of default columns {key: value} # key: column name string # value: data type string self.defaultColumns = {"context_id": "serial", "context_name": "varchar(255)", "parent_id": "bigint NULL", "context_children_id": "bigint[] NULL", "context_picture": "varchar(255)", "directory_level": "bigint", "context_path": "text"} # primary key [item] # item: column name string self.defaultPrimaryKeys = ["context_id"] # list of natural keys (if primary key is a surrogate) [item] # item: column name string self.defaultUnique = ["context_path"] # dictionary of foreign keys {key: value} # key: column name string # value: references tuple (first, second) # first: reference table name string # second: reference column name string self.defaultForeignKeys = {"parent_id": (self.tableName, "context_id")} # get trigger function string self.getTriggerFunction = None # create table self.Table = Table(self.tableName) def addRecord(self, contextPath, connectDB): """ Adds record to context table """ nonPrimeAttributes = [x for x in list(self.defaultColumns.keys()) if x not in self.defaultPrimaryKeys] if 'Linux' in platform.platform(): dirpath_split = contextPath.split("/") else: dirpath_split = contextPath.split("\\") contextName = dirpath_split[-1] directoryLevel = len(dirpath_split)-1 parent = dirpath_split[-2] if 'Linux' in platform.platform(): parentPath = "/".join(dirpath_split[0:-1]) else: parentPath= "\\".join(dirpath_split[0:-1]) contextChildrenID = None contextPicture = None # root = next(os.walk(os.getcwd()))[1][0] root = config.PARSE['rootDirectory'] if parent == root: parentID = None else: cur = connectDB.connection.cursor() try: strSQL = sql.SQL("""SELECT context_id FROM {} WHERE context_path = %s""") cur.execute(strSQL.format(sql.Identifier(self.tableName)), ([parentPath])) parentID = cur.fetchone() parentID = parentID[0] finally: cur.close() cur = connectDB.connection.cursor() try: strSQL1 = sql.SQL("""INSERT INTO {} ({}) VALUES ({})""") strSQL2 = sql.SQL(', ').join(map(sql.Identifier, nonPrimeAttributes)) strSQL3 = sql.SQL(', ').join(sql.Placeholder() * len(nonPrimeAttributes)) cur.execute(strSQL1.format(sql.Identifier(self.tableName), strSQL2, strSQL3), ([contextName, parentID, contextChildrenID, contextPicture, directoryLevel, contextPath])) finally: cur.close() # update context children ID in --context-- table self.generateContextChildrenID(contextPath, connectDB) # update context picture in --context-- table contextPicture = self.generateContextPicture(contextPath) self.updateContextProperty(contextPath, {"context_picture": contextPicture}, connectDB) def deleteRecord(self, contextPath, connectDB): """ Deletes record from context table """ cur = connectDB.connection.cursor() try: strSQL = sql.SQL("""DELETE FROM {} WHERE context_path = %s""") cur.execute(strSQL.format(sql.Identifier(self.tableName)), ([contextPath])) finally: cur.close() def updateContextProperty(self, contextPath, setClause, connectDB): """ Updates record from context table """ for key, val in setClause.items(): if key == "parent_id": cipid = self.Table.selectColumn("parent_id", {"context_path": [contextPath]}) key = "context_id" contextPath = self.Table.selectColumn("context_path", {key: [cipid[0]]}) cur = connectDB.connection.cursor() try: strSQL = sql.SQL("""UPDATE {} SET {} = %s WHERE context_path = %s""") cur.execute(strSQL.format(sql.Identifier(self.tableName), sql.Identifier(key)), (val, contextPath)) finally: cur.close() # con.close() def generateContextChildrenID(self, contextPath, connectDB): contextID = self.Table.selectColumn("context_id", {"context_path": [contextPath]}) parentID = self.Table.selectColumn("parent_id", {"context_path": [contextPath]}) if parentID[0]: contextChildrenID = self.Table.selectColumn("context_children_id", {"context_id": [parentID[0]]}) if contextChildrenID[0] == None: contextChildrenID = [contextID[0]] parentContextPath = self.Table.selectColumn("context_path", {"context_id": [parentID[0]]}) self.updateContextProperty(parentContextPath[0], {"context_children_id": contextChildrenID}, connectDB) else: contextChildrenID[0].extend(contextID) parentContextPath = self.Table.selectColumn("context_path", {"context_id": [parentID[0]]}) self.updateContextProperty(parentContextPath[0], {"context_children_id": contextChildrenID[0]}, connectDB) def generateContextPicture(self, contextPath): """ Generates context picture for context table """ contextID = self.Table.selectColumn("context_id", {"context_path": [contextPath]}) contextName = self.Table.selectColumn("context_name", {"context_path": [contextPath]}) return("-".join([str(contextID[0]), contextName[0]]))