def test_reparsing(self): result = Tokenizer.tokenize('+2+2') result = Parser(result).parse() expected = result result = str(result) result = Tokenizer.tokenize(result) result = Parser(result).parse() self.assertEqual(str(result), str(expected))
def test_transformation_failure(self): start = Parser(Tokenizer.tokenize('x+y')).parse() end = start transformation = ExpressionSubstitution(start, end) instantiated_start = Parser(Tokenizer.tokenize('a + b')).parse() pattern = SubstitutionPattern({'x': 'xyz'}) self.assertRaises(Exception, transformation.transform, [instantiated_start, pattern])
def test_equation_cancellation_with_negative(self): lhs = Parser(Tokenizer.tokenize('x + -4')).parse() rhs = Parser(Tokenizer.tokenize('y')).parse() equation = Equation(lhs, rhs) addition_cancellation = EquationCancellation(OperationType.PLUS(), OperationType.MINUS()) self.assertTrue(addition_cancellation.is_applicable_to(equation)) result = addition_cancellation.apply(equation) verify(str(result), self.reporter)
def test_equation_cancellation(self): lhs = Parser(Tokenizer.tokenize('x * 4')).parse() rhs = Parser(Tokenizer.tokenize('y')).parse() equation = Equation(lhs, rhs) multiplication_cancellation = EquationCancellation( OperationType.TIMES(), OperationType.DIVIDE()) self.assertTrue(multiplication_cancellation.is_applicable_to(equation)) result = multiplication_cancellation.apply(equation) verify(str(result), self.reporter)
def test_equation_cancellation_is_applicable(self): lhs = Parser(Tokenizer.tokenize('x + 4')).parse() rhs = Parser(Tokenizer.tokenize('y')).parse() equation = Equation(lhs, rhs) addition_cancellation = EquationCancellation(OperationType.PLUS(), OperationType.MINUS()) self.assertTrue(addition_cancellation.is_applicable_to(equation)) flipped = equation.flip() self.assertFalse(addition_cancellation.is_applicable_to(flipped))
def test_identity_transformation(self): start = Parser(Tokenizer.tokenize('x')).parse() end = start transformation = ExpressionSubstitution(start, end) instantiated_start = Parser(Tokenizer.tokenize('abc')).parse() pattern = SubstitutionPattern({'x': 'abc'}) verify( '{} -> {}'.format( 'abc', transformation.transform(instantiated_start, pattern)), self.reporter)
def test_all_substitutions_same_variable(self): expression = Parser(Tokenizer.tokenize('x + x + x')).parse() start = Parser(Tokenizer.tokenize('a + a')).parse() end = Parser(Tokenizer.tokenize('2 * a')).parse() transformation = ExpressionSubstitution(start, end) transformations = transformation.get_all_substitutions(expression) to_return = list() for pattern, result in transformations: row = list() for key in sorted(pattern.keys()): row.append('{} : {}'.format(key, pattern[key])) to_return.append('{' + ', '.join(row) + '} => ' + str(result)) verify('\n'.join(to_return), self.reporter)
def test_transformation_with_expression(self): start = Parser(Tokenizer.tokenize('x + y')).parse() end = Parser(Tokenizer.tokenize('y + x')).parse() transformation = ExpressionSubstitution(start, end) instantiated_start = Parser(Tokenizer.tokenize('1+(2+3+4)')).parse() pattern = SubstitutionPattern({ 'x': '1', 'y': Parser(Tokenizer.tokenize('2+3+4')).parse() }) verify( '{} -> {}'.format( str(instantiated_start), transformation.transform(instantiated_start, pattern)), self.reporter)
def test_complex_single_solution_solve(self): lhs = Parser(Tokenizer.tokenize('x * 4 - 18')).parse() rhs = Parser(Tokenizer.tokenize('2')).parse() equation = Equation(lhs, rhs) cancellations = [ EquationCancellation(OperationType.PLUS(), OperationType.MINUS()), EquationCancellation(OperationType.MINUS(), OperationType.PLUS()), EquationCancellation(OperationType.TIMES(), OperationType.DIVIDE()), EquationCancellation(OperationType.DIVIDE(), OperationType.TIMES()) ] transformations = list( map(lambda x: x.as_transformation(), cancellations)) step = SolverStep(transformations) step.next_step = step condition = lambda x: str(x.lhs) == 'x' result = step.execute_until(equation, condition) verify(str(result), self.reporter)
def test_order_of_operations(self): result = Tokenizer.tokenize('+2+-2*4') result = Parser(result).parse() verify(str(result), self.reporter)
def test_binary_parsing(self): result = Tokenizer.tokenize('+2+2') result = Parser(result).parse() verify(str(result), self.reporter)
def test_complex_parens(self): result = Tokenizer.tokenize('(+(2))+(2)') result = Parser(result).parse() verify(str(result), self.reporter)
def test_redundent_parens(self): result = Tokenizer.tokenize('((x))') result = Parser(result).parse() verify(str(result), self.reporter)
def test_simple_parsing(self): result = Tokenizer.tokenize('-2') result = Parser(result).parse() verify(str(result), self.reporter)
def test_tokenizing(self): result = Tokenizer.tokenize('(34+2*x)/14+17-x^2') verify(str(result), self.reporter)
DATA_PATH = '../data/twitter-data-cleaned.txt' with open(DATA_PATH, 'r', encoding="utf-8") as f: data = f.readlines() data = [d.strip() for d in data if d.strip() != ''] # getting our chosen emojis SELECTED_EMOJIS_PATH = '../data/best-emojis.json' with open(SELECTED_EMOJIS_PATH, 'r') as f: EMOJIS = json.load(f) EMOJI_CHARS = [e['char'] for e in EMOJIS] ALL_EMOJIS = set(emoji.emojize(emoji_code) for emoji_code in emoji.UNICODE_EMOJI.values()) # preprocessing the data tokenizer = Tokenizer(EMOJI_CHARS) # take 3 previous words as context for the emoji context = {e:[] for e in EMOJI_CHARS} emojiToId = {e:i for i,e in enumerate(EMOJI_CHARS)} for tweet in data: tokens = tokenizer.tokenize(tweet) for i,token in enumerate(tokens): if token.token_type == TokenType.EMOJIS: closest = tokenizer.findClosestNWords(5, tokens, i) if closest: context[token.raw].append(closest) emojiBestWords = [] emojiWorstWords = [] for i in range(len(EMOJI_CHARS)):