Exemplo n.º 1
0
 def test_reparsing(self):
     result = Tokenizer.tokenize('+2+2')
     result = Parser(result).parse()
     expected = result
     result = str(result)
     result = Tokenizer.tokenize(result)
     result = Parser(result).parse()
     self.assertEqual(str(result), str(expected))
Exemplo n.º 2
0
    def test_transformation_failure(self):

        start = Parser(Tokenizer.tokenize('x+y')).parse()
        end = start

        transformation = ExpressionSubstitution(start, end)

        instantiated_start = Parser(Tokenizer.tokenize('a + b')).parse()
        pattern = SubstitutionPattern({'x': 'xyz'})
        self.assertRaises(Exception, transformation.transform,
                          [instantiated_start, pattern])
Exemplo n.º 3
0
    def test_equation_cancellation_with_negative(self):

        lhs = Parser(Tokenizer.tokenize('x + -4')).parse()
        rhs = Parser(Tokenizer.tokenize('y')).parse()
        equation = Equation(lhs, rhs)

        addition_cancellation = EquationCancellation(OperationType.PLUS(),
                                                     OperationType.MINUS())

        self.assertTrue(addition_cancellation.is_applicable_to(equation))
        result = addition_cancellation.apply(equation)
        verify(str(result), self.reporter)
Exemplo n.º 4
0
    def test_equation_cancellation(self):

        lhs = Parser(Tokenizer.tokenize('x * 4')).parse()
        rhs = Parser(Tokenizer.tokenize('y')).parse()
        equation = Equation(lhs, rhs)

        multiplication_cancellation = EquationCancellation(
            OperationType.TIMES(), OperationType.DIVIDE())

        self.assertTrue(multiplication_cancellation.is_applicable_to(equation))
        result = multiplication_cancellation.apply(equation)
        verify(str(result), self.reporter)
Exemplo n.º 5
0
    def test_equation_cancellation_is_applicable(self):

        lhs = Parser(Tokenizer.tokenize('x + 4')).parse()
        rhs = Parser(Tokenizer.tokenize('y')).parse()
        equation = Equation(lhs, rhs)

        addition_cancellation = EquationCancellation(OperationType.PLUS(),
                                                     OperationType.MINUS())

        self.assertTrue(addition_cancellation.is_applicable_to(equation))
        flipped = equation.flip()
        self.assertFalse(addition_cancellation.is_applicable_to(flipped))
Exemplo n.º 6
0
    def test_identity_transformation(self):

        start = Parser(Tokenizer.tokenize('x')).parse()
        end = start

        transformation = ExpressionSubstitution(start, end)

        instantiated_start = Parser(Tokenizer.tokenize('abc')).parse()
        pattern = SubstitutionPattern({'x': 'abc'})
        verify(
            '{} -> {}'.format(
                'abc', transformation.transform(instantiated_start, pattern)),
            self.reporter)
Exemplo n.º 7
0
    def test_all_substitutions_same_variable(self):

        expression = Parser(Tokenizer.tokenize('x + x + x')).parse()
        start = Parser(Tokenizer.tokenize('a + a')).parse()
        end = Parser(Tokenizer.tokenize('2 * a')).parse()

        transformation = ExpressionSubstitution(start, end)

        transformations = transformation.get_all_substitutions(expression)

        to_return = list()
        for pattern, result in transformations:
            row = list()
            for key in sorted(pattern.keys()):
                row.append('{} : {}'.format(key, pattern[key]))
            to_return.append('{' + ', '.join(row) + '} => ' + str(result))

        verify('\n'.join(to_return), self.reporter)
Exemplo n.º 8
0
    def test_transformation_with_expression(self):

        start = Parser(Tokenizer.tokenize('x + y')).parse()
        end = Parser(Tokenizer.tokenize('y + x')).parse()

        transformation = ExpressionSubstitution(start, end)

        instantiated_start = Parser(Tokenizer.tokenize('1+(2+3+4)')).parse()
        pattern = SubstitutionPattern({
            'x':
            '1',
            'y':
            Parser(Tokenizer.tokenize('2+3+4')).parse()
        })
        verify(
            '{} -> {}'.format(
                str(instantiated_start),
                transformation.transform(instantiated_start, pattern)),
            self.reporter)
Exemplo n.º 9
0
    def test_complex_single_solution_solve(self):

        lhs = Parser(Tokenizer.tokenize('x * 4 - 18')).parse()
        rhs = Parser(Tokenizer.tokenize('2')).parse()
        equation = Equation(lhs, rhs)

        cancellations = [
            EquationCancellation(OperationType.PLUS(), OperationType.MINUS()),
            EquationCancellation(OperationType.MINUS(), OperationType.PLUS()),
            EquationCancellation(OperationType.TIMES(),
                                 OperationType.DIVIDE()),
            EquationCancellation(OperationType.DIVIDE(), OperationType.TIMES())
        ]

        transformations = list(
            map(lambda x: x.as_transformation(), cancellations))

        step = SolverStep(transformations)
        step.next_step = step

        condition = lambda x: str(x.lhs) == 'x'

        result = step.execute_until(equation, condition)
        verify(str(result), self.reporter)
Exemplo n.º 10
0
 def test_order_of_operations(self):
     result = Tokenizer.tokenize('+2+-2*4')
     result = Parser(result).parse()
     verify(str(result), self.reporter)
Exemplo n.º 11
0
 def test_binary_parsing(self):
     result = Tokenizer.tokenize('+2+2')
     result = Parser(result).parse()
     verify(str(result), self.reporter)
Exemplo n.º 12
0
 def test_complex_parens(self):
     result = Tokenizer.tokenize('(+(2))+(2)')
     result = Parser(result).parse()
     verify(str(result), self.reporter)
Exemplo n.º 13
0
    def test_redundent_parens(self):

        result = Tokenizer.tokenize('((x))')
        result = Parser(result).parse()
        verify(str(result), self.reporter)
Exemplo n.º 14
0
    def test_simple_parsing(self):

        result = Tokenizer.tokenize('-2')
        result = Parser(result).parse()
        verify(str(result), self.reporter)
Exemplo n.º 15
0
    def test_tokenizing(self):

        result = Tokenizer.tokenize('(34+2*x)/14+17-x^2')
        verify(str(result), self.reporter)
Exemplo n.º 16
0
DATA_PATH = '../data/twitter-data-cleaned.txt'
with open(DATA_PATH, 'r',  encoding="utf-8") as f:
    data = f.readlines()
data = [d.strip() for d in data if d.strip() != '']

# getting our chosen emojis
SELECTED_EMOJIS_PATH = '../data/best-emojis.json'
with open(SELECTED_EMOJIS_PATH, 'r') as f:
    EMOJIS = json.load(f)
EMOJI_CHARS = [e['char'] for e in EMOJIS]

ALL_EMOJIS = set(emoji.emojize(emoji_code) for emoji_code in emoji.UNICODE_EMOJI.values())

# preprocessing the data

tokenizer = Tokenizer(EMOJI_CHARS)
# take 3 previous words as context for the emoji
context = {e:[] for e in EMOJI_CHARS}
emojiToId = {e:i for i,e in enumerate(EMOJI_CHARS)}

for tweet in data:
    tokens = tokenizer.tokenize(tweet)
    for i,token in enumerate(tokens):
        if token.token_type == TokenType.EMOJIS:
            closest = tokenizer.findClosestNWords(5, tokens, i)
            if closest:
                context[token.raw].append(closest)

emojiBestWords = []
emojiWorstWords = []
for i in range(len(EMOJI_CHARS)):