def getVersion(line): line = hf.completeStripAndLower(line) #print("returning with {}".format(line[line.find("version"):])) versionIndex = line.find("version") if versionIndex>=0: return line[line.find("version")+7:] else: return False
def findLineFromStart(theFile, lookingFor): ''' upgrade: could find the character number and then I can just use seek to set the spot in the file more easily... But that also runs the risk of the file being changed. But so does finding the line number, which is what I'm doing now so... ''' theFile.seek(0, 0) for lineNum, line in enumerate(theFile): line = hf.completeStripAndLower(line) if line == lookingFor: #found it return lineNum return None #didn't find it
def getToInfoStart(f, timesRun=0): #f is the open file ''' dh.log("where I'm at "+str(f.tell())) for lineNum, line in enumerate(f): dh.log("where I'm at "+str(f.tell())) if lineNum == self.infoStartLine: if hf.completeStripAndLower(line)=="infostart": dh.log("Caught the right line the first time!") dh.log("here's what I got: "+str(f.tell())) break else: self.update(True) #update the starting place since I didn't get it right. f.seek(0, 0) #go back to the start of the file to find it again. for corLineNum, corLine in enumerate(f): if corLineNum == self.infoStartLine: if hf.completeStripAndLower(corLine)=="infostart": #a quick second catch in case something really weird is going on. break else: print("Error: FileFinderHelper0p0: The infoStartLine keeps on being changed on me! I don't know how to function! Returning None.") return None ''' for i in range(self.infoStartLine): f.readline() line = f.readline( ) #read one more time because range(x) goes up to x-1 if hf.completeStripAndLower(line) == "infostart": return #found it and read past it. Should be right at the start of the information now else: if timesRun > 1: #let it run through twice, then if it still can't get it right, stop it. print( "Error: FileFinderHelper0p0: The infoStartLine keeps on being changed on me! I don't know how to function! Returning None." ) return None self.update(True) self.update( True ) #update the starting place since I didn't get it right. f.seek(0, 0) #go back to the start of the file to find it again. getToInfoStart(f, timesRun + 1) #rerun this return #break out with nothing
def addPhrases(self, dT, wordIndex, wordsToKnow, possPhrases, phraseEnds, nextWords): ###DEFINITIONS### def getNextWordAndPhrase( phrase ): #returns a tuple of the second word in the phrase and the phrase with the first word cut out phraseEnd = re.sub( self.nextWordRegEx, "", phrase, count=1 ) #gets rid of the first word (and any one ^$$^ at the front) #print("getNextWordAndPhrase: Phrase end:"+phraseEnd) nextWord = re.match(self.nextWordRegEx, phraseEnd).group(0).strip() #print("getNextWordAndPhrase: nextWord:"+nextWord) return nextWord, phraseEnd ###DEFINITIONS END### word, phraseNums = wordsToKnow[wordIndex] phrasesToAdd = dT.getPSW( word) #todo write! #short for "getPhrasesStartingWith" for phrase in phrasesToAdd: if not HF.allEqual( len(possPhrases), len(phraseEnds), len(nextWords) ): #this shouldn't happen, but I want to display a warning if it does. print( "addPhrases: ERROR! The phrase lists are not all the same size!" ) possPhrases.append(phrase) #dp stuff if len(possPhrases) - 1 > 13: printOut( "addPhrases: CAUGHT SOMETHING FISHY! number (top index of possPhrases) then possPhrases", len(possPhrases) - 1, possPhrases) phraseNums.append(len(possPhrases) - 1) nextWord, remainingPhrase = getNextWordAndPhrase(phrase) phraseEnds.append(remainingPhrase) nextWords.append(nextWord) #todo delete this: nts: so something is happening in between this and getPhraseMatchOrder so that possPhrases is losing some elements #dp printOut("This is the possPhrases that I'm giving back", possPhrases, "Top index:", len(possPhrases) - 1) return possPhrases, phraseEnds, nextWords
def getToStartInfo(f, timesRun=0): #f is the open file #read up to where the info start line is supposed to be for i in range(self.startInfoLine): f.readline() line = f.readline( ) #read one more time to get past it because range(x) goes up to x-1 if hf.completeStripAndLower(line) == "startinfo": return #found it and read past it. Should be right at the start of the information now else: if timesRun > 1: #let it run through twice, then if it still can't get it right, stop it. print( "Error: FileFinderHelper0p0: The startInfoLine keeps on being changed on me! I don't know how to function! Returning None." ) return None self.update( True ) #update the starting place since I didn't get it right. f.seek(0, 0) #go back to the start of the file to find it again. getToStartInfo(f, timesRun + 1) #rerun this return #break out with nothing
def run(unopenedFile): with open(unopenedFile) as file: ans = [] for lineNum, line in enumerate(file): ''' The line should be in the form "Item name symbol: ### Item name ###" So that when the computer sees something like... "### Item #23 ###" it knows that that is an item with the name "Item #23" So more formally the line is in the form: (section name) symbol:(symbol before)(section name)(symbol after) Where (section name) can be anything; it's really only for human readability (I call it a placeholder), ... and (symbol before) and (symbol after) are the symbols the computer looks for... to identify an object at that section level, and "symbol:" is exactly as written. ''' if lineNum > 0: #if it's not the version line line = line.strip() line = line.lower() #I don't care about case if line != "": #if the line is not just empty space if hf.completeStrip(line) == "formatend": ansLen = len(ans) if ansLen < 6: #if they didn't give me everything I need print( "Warning! SectionSymbolFormat0p0Loader: Did not recieve name and item symbols correctly. Returning array as is." ) if ansLen % 3 != 0: #woah, something is really wrong. The array's not even in the right format! print( "Error! SectionSymbolFormat0p0Loader: Array has incorrect amount of items (incorrect format)! Returning it as is." ) return ans ''' I just decided to not allow space in between the word "symbol" and the colon because... I didn't want to have to import and use regex... So that may be changed later on. ''' BREAKER = " symbol:" #what we're looking for inbetween the section name and the symbols sectionName = hf.getSubstringInBetween( line, False, BREAKER) if sectionName == None: print( "Warning! SectionSymbolFormat0p0Loader: Did not find the correct format at line {}. Returning the symbols I got." .format(lineNum + 1)) return ans leftSymbol = hf.getSubstringInBetween( line, BREAKER, sectionName) rightSymbol = hf.getSubstringInBetween( line, sectionName, False, line.find(BREAKER)) if sectionName != None and leftSymbol != None and rightSymbol != None: #if this is supposed to be a symbol definition ans.append(sectionName) ans.append(leftSymbol) ans.append(rightSymbol) else: print( "Warning! SectionSymbolFormat0p0Loader: Suspected bad format at line {}. I skipped that line." .format(lineNum + 1)) print( "Error! SectionSymbolFormat0p0Loader: encountered EOF before format end! Returning the symbols I got." ) return ans
def __init__(self, formatPage): self.SECTION_SYMBOL_INDICATOR = "Section Symbol:" self.ITEM_NAME_SYMBOL_INDICATOR = "Item Name Symbol:" self.ITEM_SYMBOL_INDICATOR = "Item Symbol:" self.SECTION_REPLACEMENT = "Section" self.ITEM_NAME_REPLACEMENT = "Item Name" self.ITEM_REPLACEMENT = "Item" self.DOES_CASE_MATTER = False #if true, it lowers everything before evaluating stuff. def getFormatInfo(): ans = [] for i in range(3): #create a table with three slots of lists; #each of the small lists should be filled with two pieces of info by the end ans.append([None, None]) with open(self.formatPage) as f: #open the page to read it ''' #testing if True: for line in f: print(line) return True ''' ''' takes the string "Section Symbol:" or something like that that tells the computer you're about to tell it how to look for a new section. Also takes the thing inside of the sybol that stands in for the actual words, such as "Section" or "Item". Also takes the line it's supposed to find the info in Assumes all parameters are lowercase. Returns the symbols that are supposed to go before the words and after the words as a tuple I know this sounds confusing, but if you just read through it, it will make sense. ''' class quickSetUpSymbol(): #sets up the function def __init__(self, line, ansTable, shouldLower): self.shouldLower = shouldLower self.line = line if shouldLower: self.line = self.line.lower() self.ansTable = ansTable #will actually change the table #checkes to see if each answer in ans is not None def isAnsFilled(self, theAns): for val in theAns: if val == [None, None]: return False return True #gets the before and after symbols for the given signal and returns them in a tuple (if found in the given line) #returns False if it doesn't find the info def findInfo(self, signal, insideSymbol, line): #print("findInfo report: signal: {}\ninsideSymbol: {}\nline: {}".format(signal, insideSymbol, line)) where = line.find(signal) if where == 0: pastSignal = len( signal ) #saves the index that we need to start doing things just past the signal symbol insideSymWhere = line.find(insideSymbol, pastSignal) #print("insideSymWhere: {}".format(insideSymWhere)) #print("findInfo found info: {}".format((line[len(signal):insideSymWhere], line[insideSymWhere+len(insideSymbol):-1]))) #get the two respective pieces and return them. #The -1 is there because there's a newline symbol at the end I don't want. return (line[pastSignal:insideSymWhere], line[insideSymWhere + len(insideSymbol):-1]) else: return False #no info in this line def setNewLine(self, line): if self.shouldLower: line = line.lower() self.line = line def run(self, getThisInfo): #getThisInfo is a list in the form [[symbolIndicator1, replaceSymbol1], [symbolIndicator2, replaceSymbol2], ...] #lowering via code stuff that could be lowercased by hand (if it should be) for pairNum, pair in enumerate(getThisInfo): signal = pair[0] insideReplacer = pair[1] if self.shouldLower: signal = signal.lower() insideReplacer = insideReplacer.lower() info = self.findInfo(signal, insideReplacer, self.line) if info: #print("info: {}".format(info)) ans[pairNum][0] = info[0] ans[pairNum][1] = info[1] #print("ans: {}".format(ans)) qSetUp = False #just declaring it for use for line in f: #print("qSetUp type: {}".format(type(qSetUp))) if not qSetUp: qSetUp = quickSetUpSymbol( line, ans, self.DOES_CASE_MATTER) #it will lowercase the line else: qSetUp.setNewLine( line) #it will lower the line when it runs qSetUpArgs = [ ] #set up the table that will contain what info we're looking for qSetUpArgs.append([ self.SECTION_SYMBOL_INDICATOR, self.SECTION_REPLACEMENT ]) qSetUpArgs.append([ self.ITEM_NAME_SYMBOL_INDICATOR, self.ITEM_NAME_REPLACEMENT ]) qSetUpArgs.append( [self.ITEM_SYMBOL_INDICATOR, self.ITEM_REPLACEMENT]) qSetUp.run(qSetUpArgs) if qSetUp.isAnsFilled(ans): print("ans is filled! Here's ans: {}".format(ans)) return ans print( "SymbolPageReader:getFormatInfo(): Warning! Not all info found! I'm returning this as the symbols: {}" .format(ans)) return ans self.formatInfo = [] self.formatPage = formatPage formatInfo = getFormatInfo( ) #should return [[beforeSectionSymbol, afterSectionSymbol], [beforeNameSym, afterNameSym], ..., ...] #print("returned formatInfo: {}".format(formatInfo)) wantedInfo = ["sectionFormat", "itemNameFormat", "itemFormat"] hf.addNameAtts(self, wantedInfo, formatInfo)