示例#1
0
 def preprocess(self, text: str) -> List[str]:
     text = txt.expand_contractions(
            txt.strip_additions(
            txt.resurrect_expletives(text)))
     lemmas = tkn.lemmatize(
              tkn.drop_stopwords(
              tkn.tokenize(text)))
     return lemmas
	def calculate_chi_square(self, term, classs):
		N = Tokens.get_count_email()
		a = Tokens.get_count_term_on_class(term, classs)
		b = Tokens.get_count_term_not_on_class(term, classs)
		c = Tokens.get_count_not_term_on_class(term, classs)
		d = Tokens.get_count_not_term_not_on_class(term, classs)

		nom = (N * (A * D - B * C) * (A * D - B * C))
		denom = ((A + C) *  (B + D) * (A + B) * (C + D))

		chi_square = nom / denom  

		return chi_square
示例#3
0
 def __init__(self):
     """
     Initialize class variables including an instance of the Tokens class
     """
     self.obj_list = list()
     self.code_reg = Tokens.CodeRegex()
     self.current_file_name = ""
     self.current_line_num = 0
示例#4
0
 def addToken(self, tokenType, literal=None, indent=""):
     #if we haven't been given an intentation, then fetch it from memory
     if not indent:
         indent = self.indent
     #return the text from the sourcecode (characters between the start and current position)
     text = self.source[self.start:self.current]
     #create and return a token
     return Tokens.Token(tokenType, text, literal, self.line, self.char,
                         indent)
示例#5
0
def ExpressionHelper(toks, space=Whitespace):
    toks = [toks[0]] + [(t | _next_) for t in toks]
    for i in range(1, len(toks)):
        toks[i] %= dict(next=toks[i - 1], this=toks[i], top=toks[-1])
    if space:
        for i, tok in enumerate(toks):
            if not isinstance(tok, Oper):
                toks[i] = Tokens._pad(_sp_, tok)
    toks[-1] %= {_sp_: space}
    return toks[-1]
示例#6
0
def ExpressionHelper(toks, space=Whitespace):
    toks = [toks[0]] + [(t|_next_) for t in toks]
    for i in range(1, len(toks)):
        toks[i] %= dict(next=toks[i-1], this=toks[i], top=toks[-1])
    if space:
        for i, tok in enumerate(toks):
            if not isinstance(tok, Oper):
                toks[i] = Tokens._pad(_sp_, tok)
    toks[-1] %= {_sp_: space}
    return toks[-1]
示例#7
0
 def test_shift(self):
     tokens = Tokens.Classic('I say, "Hi!"')
     self.assertEqual("I", tokens.current().show())
     tokens.shift()
     self.assertEqual("say", tokens.current().show())
     tokens.shift()
     self.assertEqual("Hi", tokens.current().show())
     try:
         tokens.shift()
         self.fail()
     except Exception:
         pass
示例#8
0
 def insertToken(self,
                 ttype,
                 lexeme="",
                 literal=None,
                 line=0,
                 char=0,
                 indent=0):
     if isinstance(ttype, str):
         self.tokens.insert(
             self.current,
             Tokens.Token(ttype, lexeme, literal, line, char, indent))
     else:
         self.tokens.insert(self.current, ttype)
示例#9
0
 def test_text(self):
     tokens = Tokens.Classic('I say, "Hi!"')
     self.assertEqual('_ ___, "__!"', tokens.text())
     tokens.shift()
     self.assertEqual('I ___, "__!"', tokens.text())
     tokens.shift()
     self.assertEqual('I say, "__!"', tokens.text())
     tokens.shift()
     self.assertEqual('I say, "Hi!"', tokens.text())
     try:
         tokens.shift()
         self.fail()
     except Exception:
         pass
示例#10
0
 def scanTokens(self):
     #empty token list
     self.tokens = []
     #while we are not at the end of the list, loop
     while not self.atEnd():
         #update the position of our lexer
         self.char += (self.current - self.start)
         self.start = self.current
         #fetch the next token
         token = self.tokenFromChar()
         #append the token to our list of tokens, provided we recieved one
         if token:
             #if the last token was an ending
             if self.checkPreviousToken("End"):
                 #fetch it
                 lt = self.previousToken()
                 #and check we haven't skipped an indentation
                 if token.indent < lt.indent - 1:
                     #create a new ending token at the same position, but with one lower indentation
                     self.tokens.append(
                         Tokens.Token("End", "", None, lt.line, lt.char,
                                      lt.indent - 1))
             #add the token
             self.tokens.append(token)
     #add an end token if none exists
     if not self.checkPreviousToken("End"):
         self.tokens.append(
             Tokens.Token("End", "", None, self.line, self.char + 1,
                          self.indent))
     #add an EOF token
     self.tokens.append(
         Tokens.Token("EOF", "", None, self.line, self.char + 1, 0))
     #remove any leading end tokens
     while self.tokens[0].type == "End":
         self.tokens.pop(0)
     return self.tokens
示例#11
0
class Analizador:
    #Documento a analizar
    __programa = ""
    #Objeto tipo Tokens para recibir los tokens de mi lenguaje
    __token = Tokens()
    #objeto tipo Lexer
    __lexer = Lexer()

    def __init__(self, document):
        self.__programa = document

    def analizar(self):
        if self.__programa == 'codigo.ap':
            codAnalizar = open(self.__programa, encoding='UTF-8').read()
            no_Validos = self.__lexer.validar(codAnalizar,
                                              self.__token.getTokens(), True)
            validos = self.__lexer.validar(codAnalizar,
                                           self.__token.getTokens(), False)
            printProgressBar(0,
                             len(validos),
                             prefix='Progreso:',
                             suffix='Completo',
                             length=70)
            i = 0
            for valido in validos:
                if valido['token'] != '\n':
                    os.system('clear')
                    printProgressBar(i + 1,
                                     len(validos),
                                     prefix='Progreso:',
                                     suffix='Completo',
                                     length=70)
                    print('\n')
                    i += 1
                    print('[', valido['token'], ']',
                          'Hace parte del lenguaje, es un:', valido['tipo'])

            if no_Validos:
                for invalido in no_Validos:
                    print("Error en la linea", invalido['linea'], " [",
                          invalido['palabra'], "]")

            else:
                print("No se encontró ningún error léxico")
        else:
            print("Error en la apertura del archivo")
示例#12
0
def oper(symbol, operation=None, ops=BINARY, pos=CENTER):
    if isinstance(symbol, basestring):
        symtok = Omit(Raw(symbol))
    else: symtok = symbol
    if ops == BINARY:
        if pos == LEFT:
            tok = Oper([symtok, _next_, _this_])
        elif pos == CENTER:
            tok = Oper([_next_, symtok, _this_])
        elif pos == RIGHT:
            tok = Oper([_next_, _this_, symtok])
    elif ops == UNARY:
        if pos in (LEFT, CENTER):
            tok = Oper([symtok, _next_])
        elif pos == RIGHT:
            tok = Oper([_next_, symtok])
    tok = Tokens._pad(_sp_, tok)
    if operation: tok.callback = _funcmap(operation)
    tok.name = '<Oper %s>' % symbol
    return tok
示例#13
0
def oper(symbol, operation=None, ops=BINARY, pos=CENTER):
    if isinstance(symbol, basestring):
        symtok = Omit(Raw(symbol))
    else:
        symtok = symbol
    if ops == BINARY:
        if pos == LEFT:
            tok = Oper([symtok, _next_, _this_])
        elif pos == CENTER:
            tok = Oper([_next_, symtok, _this_])
        elif pos == RIGHT:
            tok = Oper([_next_, _this_, symtok])
    elif ops == UNARY:
        if pos in (LEFT, CENTER):
            tok = Oper([symtok, _next_])
        elif pos == RIGHT:
            tok = Oper([_next_, symtok])
    tok = Tokens._pad(_sp_, tok)
    if operation: tok.callback = _funcmap(operation)
    tok.name = '<Oper %s>' % symbol
    return tok
示例#14
0
 def voraxTokens(self, amt_of_scroll, amt_of_key, amt_of_pick, at_gem,
                 at_heart, at_book):
     tokens = t.Tokens(amt_of_scroll, amt_of_key, amt_of_pick)
     self.at_gem = at_gem
     self.at_heart = at_heart
     self.at_book = at_book
     if (tokens.state_of_gem == tokens.broken_gem
             and tokens.state_of_heart == tokens.broken_heart
             and tokens.state_of_book == tokens.broken_book):
         tokens.state_of_eidolon = tokens.can_get_eidolon
     else:
         if (at_gem == True and tokens.canBreakGem()):
             tokens.state_of_gem = tokens.broken_gem
         else:
             tokens.state_of_gem = tokens.gem
         if (at_heart == True and tokens.canBreakHeart()):
             tokens.state_of_heart = tokens.broken_heart
         else:
             tokens.state_of_heart = tokens.heart
         if (at_book == True and tokens.canBreakBook()):
             tokens.state_of_book = tokens.broken_book
         else:
             tokens.state_of_book = tokens.book
示例#15
0
HINT_KEY = pygame.K_SLASH

directory = 'books'
book = 'John'
#book = '3_John'
#book = 'Philemon'
filename = book + '.txt'
path = os.path.join(directory, filename)
reader = Reader.File(path)
lines = reader.lines()
parser = Parser.Simple(lines)
verses = parser.parse(max_width=max_chars)

for verse in verses:
    text = verse.text()
    tokens = Tokens.Classic(text)
    tokenized = tokens.tokenize()
    sample = Sample.Classic(tokenized)
    lines = wrapper.wrap(sample.text())
    section = verse.section()
    lines.insert(0, section)
    reference = verse.reference()
    lines.insert(1, reference)
    redraw(lines, screen)

    while sample.guessable():
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                sys.exit()

            if event.type == pygame.KEYDOWN:
示例#16
0
def tokenize(filename):
    token_grammar = Tokens.fullGrammar()
    tokenizer = Tokenizer.Tokenizer(token_grammar)

    return tokenizer.tokenize(filename)
示例#17
0
 def __init__(self, codeString):
     self.tokenList = [token for token in tokenize(codeString)]
     # Insert explicit EOF token
     self.tokenList.append(Tokens.EOF())
     self.index = 0
     self.fastForward()
示例#18
0
 def test_current(self):
     tokens = Tokens.Classic('I say, "Hi!"')
     self.assertEqual('I', tokens.current().show())