def preprocess(self, text: str) -> List[str]: text = txt.expand_contractions( txt.strip_additions( txt.resurrect_expletives(text))) lemmas = tkn.lemmatize( tkn.drop_stopwords( tkn.tokenize(text))) return lemmas
def calculate_chi_square(self, term, classs): N = Tokens.get_count_email() a = Tokens.get_count_term_on_class(term, classs) b = Tokens.get_count_term_not_on_class(term, classs) c = Tokens.get_count_not_term_on_class(term, classs) d = Tokens.get_count_not_term_not_on_class(term, classs) nom = (N * (A * D - B * C) * (A * D - B * C)) denom = ((A + C) * (B + D) * (A + B) * (C + D)) chi_square = nom / denom return chi_square
def __init__(self): """ Initialize class variables including an instance of the Tokens class """ self.obj_list = list() self.code_reg = Tokens.CodeRegex() self.current_file_name = "" self.current_line_num = 0
def addToken(self, tokenType, literal=None, indent=""): #if we haven't been given an intentation, then fetch it from memory if not indent: indent = self.indent #return the text from the sourcecode (characters between the start and current position) text = self.source[self.start:self.current] #create and return a token return Tokens.Token(tokenType, text, literal, self.line, self.char, indent)
def ExpressionHelper(toks, space=Whitespace): toks = [toks[0]] + [(t | _next_) for t in toks] for i in range(1, len(toks)): toks[i] %= dict(next=toks[i - 1], this=toks[i], top=toks[-1]) if space: for i, tok in enumerate(toks): if not isinstance(tok, Oper): toks[i] = Tokens._pad(_sp_, tok) toks[-1] %= {_sp_: space} return toks[-1]
def ExpressionHelper(toks, space=Whitespace): toks = [toks[0]] + [(t|_next_) for t in toks] for i in range(1, len(toks)): toks[i] %= dict(next=toks[i-1], this=toks[i], top=toks[-1]) if space: for i, tok in enumerate(toks): if not isinstance(tok, Oper): toks[i] = Tokens._pad(_sp_, tok) toks[-1] %= {_sp_: space} return toks[-1]
def test_shift(self): tokens = Tokens.Classic('I say, "Hi!"') self.assertEqual("I", tokens.current().show()) tokens.shift() self.assertEqual("say", tokens.current().show()) tokens.shift() self.assertEqual("Hi", tokens.current().show()) try: tokens.shift() self.fail() except Exception: pass
def insertToken(self, ttype, lexeme="", literal=None, line=0, char=0, indent=0): if isinstance(ttype, str): self.tokens.insert( self.current, Tokens.Token(ttype, lexeme, literal, line, char, indent)) else: self.tokens.insert(self.current, ttype)
def test_text(self): tokens = Tokens.Classic('I say, "Hi!"') self.assertEqual('_ ___, "__!"', tokens.text()) tokens.shift() self.assertEqual('I ___, "__!"', tokens.text()) tokens.shift() self.assertEqual('I say, "__!"', tokens.text()) tokens.shift() self.assertEqual('I say, "Hi!"', tokens.text()) try: tokens.shift() self.fail() except Exception: pass
def scanTokens(self): #empty token list self.tokens = [] #while we are not at the end of the list, loop while not self.atEnd(): #update the position of our lexer self.char += (self.current - self.start) self.start = self.current #fetch the next token token = self.tokenFromChar() #append the token to our list of tokens, provided we recieved one if token: #if the last token was an ending if self.checkPreviousToken("End"): #fetch it lt = self.previousToken() #and check we haven't skipped an indentation if token.indent < lt.indent - 1: #create a new ending token at the same position, but with one lower indentation self.tokens.append( Tokens.Token("End", "", None, lt.line, lt.char, lt.indent - 1)) #add the token self.tokens.append(token) #add an end token if none exists if not self.checkPreviousToken("End"): self.tokens.append( Tokens.Token("End", "", None, self.line, self.char + 1, self.indent)) #add an EOF token self.tokens.append( Tokens.Token("EOF", "", None, self.line, self.char + 1, 0)) #remove any leading end tokens while self.tokens[0].type == "End": self.tokens.pop(0) return self.tokens
class Analizador: #Documento a analizar __programa = "" #Objeto tipo Tokens para recibir los tokens de mi lenguaje __token = Tokens() #objeto tipo Lexer __lexer = Lexer() def __init__(self, document): self.__programa = document def analizar(self): if self.__programa == 'codigo.ap': codAnalizar = open(self.__programa, encoding='UTF-8').read() no_Validos = self.__lexer.validar(codAnalizar, self.__token.getTokens(), True) validos = self.__lexer.validar(codAnalizar, self.__token.getTokens(), False) printProgressBar(0, len(validos), prefix='Progreso:', suffix='Completo', length=70) i = 0 for valido in validos: if valido['token'] != '\n': os.system('clear') printProgressBar(i + 1, len(validos), prefix='Progreso:', suffix='Completo', length=70) print('\n') i += 1 print('[', valido['token'], ']', 'Hace parte del lenguaje, es un:', valido['tipo']) if no_Validos: for invalido in no_Validos: print("Error en la linea", invalido['linea'], " [", invalido['palabra'], "]") else: print("No se encontró ningún error léxico") else: print("Error en la apertura del archivo")
def oper(symbol, operation=None, ops=BINARY, pos=CENTER): if isinstance(symbol, basestring): symtok = Omit(Raw(symbol)) else: symtok = symbol if ops == BINARY: if pos == LEFT: tok = Oper([symtok, _next_, _this_]) elif pos == CENTER: tok = Oper([_next_, symtok, _this_]) elif pos == RIGHT: tok = Oper([_next_, _this_, symtok]) elif ops == UNARY: if pos in (LEFT, CENTER): tok = Oper([symtok, _next_]) elif pos == RIGHT: tok = Oper([_next_, symtok]) tok = Tokens._pad(_sp_, tok) if operation: tok.callback = _funcmap(operation) tok.name = '<Oper %s>' % symbol return tok
def voraxTokens(self, amt_of_scroll, amt_of_key, amt_of_pick, at_gem, at_heart, at_book): tokens = t.Tokens(amt_of_scroll, amt_of_key, amt_of_pick) self.at_gem = at_gem self.at_heart = at_heart self.at_book = at_book if (tokens.state_of_gem == tokens.broken_gem and tokens.state_of_heart == tokens.broken_heart and tokens.state_of_book == tokens.broken_book): tokens.state_of_eidolon = tokens.can_get_eidolon else: if (at_gem == True and tokens.canBreakGem()): tokens.state_of_gem = tokens.broken_gem else: tokens.state_of_gem = tokens.gem if (at_heart == True and tokens.canBreakHeart()): tokens.state_of_heart = tokens.broken_heart else: tokens.state_of_heart = tokens.heart if (at_book == True and tokens.canBreakBook()): tokens.state_of_book = tokens.broken_book else: tokens.state_of_book = tokens.book
HINT_KEY = pygame.K_SLASH directory = 'books' book = 'John' #book = '3_John' #book = 'Philemon' filename = book + '.txt' path = os.path.join(directory, filename) reader = Reader.File(path) lines = reader.lines() parser = Parser.Simple(lines) verses = parser.parse(max_width=max_chars) for verse in verses: text = verse.text() tokens = Tokens.Classic(text) tokenized = tokens.tokenize() sample = Sample.Classic(tokenized) lines = wrapper.wrap(sample.text()) section = verse.section() lines.insert(0, section) reference = verse.reference() lines.insert(1, reference) redraw(lines, screen) while sample.guessable(): for event in pygame.event.get(): if event.type == pygame.QUIT: sys.exit() if event.type == pygame.KEYDOWN:
def tokenize(filename): token_grammar = Tokens.fullGrammar() tokenizer = Tokenizer.Tokenizer(token_grammar) return tokenizer.tokenize(filename)
def __init__(self, codeString): self.tokenList = [token for token in tokenize(codeString)] # Insert explicit EOF token self.tokenList.append(Tokens.EOF()) self.index = 0 self.fastForward()
def test_current(self): tokens = Tokens.Classic('I say, "Hi!"') self.assertEqual('I', tokens.current().show())