def generic_pipeline(seq): """Preprocessing pipeline needed for all dump files. Steps in this pipeline: * Extract INSERT statements * Convert strings to unicode * Strip strings * Replace MySQL quotes with psql ones :param seq: Sequence of strings :type seq: Iterable """ seq = wpi_utils.filter_strings(r'^INSERT', seq) seq = wpi_utils.convert_multirow_to_unicode(seq) seq = (el.strip() for el in seq) seq = psql_quotation(seq) return seq
def test_convert_multirow_to_unicode(): mul_row = [b"INSERT INTO `witch` VALUES ('\xc3\xa5',23),('\xe5',42);"] eq_(wpi_utils.convert_multirow_to_unicode(mul_row, "utf8").next(), "INSERT INTO `witch` VALUES ('å',23);") mul_row = [b"""INSERT INTO "witch" VALUES ('\xc3\xa5',23),('\xe5',42);"""] eq_(wpi_utils.convert_multirow_to_unicode(mul_row, "utf8").next(), """INSERT INTO "witch" VALUES ('å',23);""")