Python get_database_configuration示例，hwrt.utils.get_database_configuration Python示例

示例#1

0

显示文件

文件： get_vocabulary.py 项目： templateK/write-math

def main():
    """
    Get a list of formulas.

    Parameters
    ----------
    cursor : a database cursor
    dataset : string
        Either 'all' or a path to a yaml symbol file.

    Returns
    -------
    list :
        A list of formulas
    """
    cfg = utils.get_database_configuration()
    mysql = cfg['mysql_online']
    connection = pymysql.connect(host=mysql['host'],
                                 user=mysql['user'],
                                 passwd=mysql['passwd'],
                                 db=mysql['db'],
                                 cursorclass=pymysql.cursors.DictCursor,
                                 charset='utf8')
    cursor = connection.cursor()
    sql = (
        "SELECT `id`, `formula_in_latex` FROM `wm_formula` "
        # "WHERE `formula_type` = 'single symbol' "
        "WHERE `formula_type` = 'nesting symbol' "
        "ORDER BY `formula_in_latex` ASC")
    cursor.execute(sql)
    symbols = cursor.fetchall()
    store_symbols(symbols)

示例#2

0

显示文件

文件： utils_test.py 项目： shalevy1/hwrt

def test_execution():
    """Test if the functions execute at all."""
    utils.get_project_root()
    utils.get_latest_model(".", "model")
    utils.get_latest_working_model(".")
    utils.get_latest_successful_run(".")
    assert utils.get_readable_time(123) == "123ms"
    assert utils.get_readable_time(1000 * 30) == "30s 0ms"
    assert utils.get_readable_time(1000 * 60) == "1 minutes 0s 0ms"
    assert utils.get_readable_time(1000 * 60 * 60) == "1h, 0 minutes 0s 0ms"
    assert utils.get_readable_time(2 * 1000 * 60 *
                                   60) == "2h, 0 minutes 0s 0ms"
    assert utils.get_readable_time(25 * 1000 * 60 * 60 +
                                   3) == "25h, 0 minutes 0s 3ms"
    utils.print_status(3, 1, 123)
    utils.get_nntoolkit()
    utils.get_database_config_file()
    utils.get_database_configuration()
    assert utils.sizeof_fmt(1) == "1.0 bytes"
    assert utils.sizeof_fmt(1111) == "1.1 KB"

示例#3

0

显示文件

文件： db_dump.py 项目： templateK/write-math

def main():
    cfg = utils.get_database_configuration()
    mysql = cfg['mysql_online']
    logging.info("Start dumping structure and constraints...")
    dir_s = "/home/moose/GitHub/write-math"
    tables = dump_structure(mysql,
                            prefix='wm_',
                            filename_strucutre=(("%s/database/structure/"
                                                 "write-math.sql") % dir_s),
                            filename_constraints=(("%s/database/structure/"
                                                   "foreign-keys.sql") %
                                                  dir_s))
    logging.info(tables)

示例#4

0

显示文件

文件： utils_test.py 项目： Duum/hwrt

def execution_test():
    """Test if the functions execute at all."""
    utils.get_project_root()
    utils.get_latest_model(".", "model")
    utils.get_latest_working_model(".")
    utils.get_latest_successful_run(".")
    nose.tools.assert_equal(utils.get_readable_time(123), "123ms")
    nose.tools.assert_equal(utils.get_readable_time(1000*30),
                            "30s 0ms")
    nose.tools.assert_equal(utils.get_readable_time(1000*60),
                            "1 minutes 0s 0ms")
    nose.tools.assert_equal(utils.get_readable_time(1000*60*60),
                            "1h, 0 minutes 0s 0ms")
    nose.tools.assert_equal(utils.get_readable_time(2*1000*60*60),
                            "2h, 0 minutes 0s 0ms")
    nose.tools.assert_equal(utils.get_readable_time(25*1000*60*60+3),
                            "25h, 0 minutes 0s 3ms")
    utils.print_status(3, 1, 123)
    utils.get_nntoolkit()
    utils.get_database_config_file()
    utils.get_database_configuration()
    nose.tools.assert_equal(utils.sizeof_fmt(1), "1.0 bytes")
    nose.tools.assert_equal(utils.sizeof_fmt(1111), "1.1 KB")

示例#5

0

显示文件

def main(destination=os.path.join(utils.get_project_root(), "raw-datasets"),
         dataset='all',
         renderings=False):
    """Main part of the backup script."""
    time_prefix = time.strftime("%Y-%m-%d-%H-%M")
    filename = ("%s-handwriting_datasets-%s-raw.pickle" %
                (time_prefix, dataset.replace('/', '-')))
    destination_path = os.path.join(destination, filename)
    logging.info("Data will be written to '%s'", destination_path)

    cfg = utils.get_database_configuration()
    mysql = cfg['mysql_online']
    connection = pymysql.connect(host=mysql['host'],
                                 user=mysql['user'],
                                 passwd=mysql['passwd'],
                                 db=mysql['db'],
                                 cursorclass=pymysql.cursors.DictCursor)
    cursor = connection.cursor()

    formulas = get_formulas(cursor, dataset)
    logging.info('Received %i formulas.', len(formulas))
    handwriting_datasets = []
    formula_id2latex = {}

    # Go through each formula and download every raw_data instance
    for formula in formulas:
        formula_id2latex[formula['id']] = formula['formula_in_latex']
        sql = ((
            "SELECT `wm_raw_draw_data`.`id`, `data`, `is_in_testset`, "
            "`wild_point_count`, `missing_line`, `user_id`, "
            "`display_name` "
            "FROM `wm_raw_draw_data` "
            "JOIN `wm_users` ON "
            "(`wm_users`.`id` = `wm_raw_draw_data`.`user_id`) "
            "WHERE `accepted_formula_id` = %s "
            # "AND `display_name` LIKE 'MfrDB::%%'"
        ) % str(formula['id']))
        cursor.execute(sql)
        raw_datasets = cursor.fetchall()
        logging.info("%s (%i)", formula['formula_in_latex'], len(raw_datasets))
        for raw_data in raw_datasets:
            try:
                handwriting = HandwrittenData(
                    raw_data['data'],
                    formula['id'],
                    raw_data['id'],
                    formula['formula_in_latex'],
                    raw_data['wild_point_count'],
                    raw_data['missing_line'],
                    raw_data['user_id'],
                    user_name=raw_data['display_name'])
                handwriting_datasets.append({
                    'handwriting':
                    handwriting,
                    'id':
                    raw_data['id'],
                    'formula_id':
                    formula['id'],
                    'formula_in_latex':
                    formula['formula_in_latex'],
                    'is_in_testset':
                    raw_data['is_in_testset']
                })
            except Exception as e:
                logging.info("Raw data id: %s", raw_data['id'])
                logging.info(e)
    pickle.dump(
        {
            'handwriting_datasets': handwriting_datasets,
            'formula_id2latex': formula_id2latex
        }, open(destination_path, "wb"), 2)

    if renderings:
        logging.info("Start downloading SVG renderings...")
        svgfolder = tempfile.mkdtemp()
        sql = """SELECT t1.formula_id, t1.svg from wm_renderings t1
                 LEFT JOIN wm_renderings t2 ON t1.formula_id = t2.formula_id
                 AND t1.creation_time < t2.creation_time
                 WHERE t2.id is null"""
        cursor.execute(sql)
        formulas = cursor.fetchall()
        logging.info("Create svg...")
        for formula in formulas:
            filename = os.path.join(svgfolder,
                                    "%s.svg" % str(formula['formula_id']))
            with open(filename, 'wb') as temp_file:
                temp_file.write(formula['svg'])
        logging.info("Tar at %s", os.path.abspath("renderings.tar"))

        tar = tarfile.open("renderings.tar.bz2", "w:bz2")
        for fn in os.listdir(svgfolder):
            filename = os.path.join(svgfolder, fn)
            if os.path.isfile(filename):
                print(filename)
                tar.add(filename, arcname=os.path.basename(filename))
        tar.close()

示例#6

0

显示文件

文件： get_probabilites_of_stroke_counts.py 项目： MartinThoma/write-math

def main(dataset='all'):
    """
    Parameters
    ----------
    dataset : string
        Either 'all' or a path to a yaml symbol file.
    """
    cfg = utils.get_database_configuration()
    mysql = cfg['mysql_online']
    connection = pymysql.connect(host=mysql['host'],
                                 user=mysql['user'],
                                 passwd=mysql['passwd'],
                                 db=mysql['db'],
                                 cursorclass=pymysql.cursors.DictCursor)
    cursor = connection.cursor()

    # TODO: no formulas, only single-symbol ones.
    formulas = get_formulas(cursor, dataset)
    prob = {}

    # Go through each formula and download every raw_data instance
    for formula in formulas:
        stroke_counts = []
        recordings = []
        sql = (("SELECT `wm_raw_draw_data`.`id`, `data`, `is_in_testset`, "
                "`wild_point_count`, `missing_line`, `user_id`, "
                "`display_name` "
                "FROM `wm_raw_draw_data` "
                "JOIN `wm_users` ON "
                "(`wm_users`.`id` = `wm_raw_draw_data`.`user_id`) "
                "WHERE `accepted_formula_id` = %s "
                "AND wild_point_count=0 "
                "AND has_correction=0 "
                # "AND `display_name` LIKE 'MfrDB::%%'"
                ) %
               str(formula['id']))
        cursor.execute(sql)
        raw_datasets = cursor.fetchall()
        logging.info("%s (%i)", formula['formula_in_latex'], len(raw_datasets))
        for raw_data in raw_datasets:
            try:
                handwriting = HandwrittenData(raw_data['data'],
                                              formula['id'],
                                              raw_data['id'],
                                              formula['formula_in_latex'],
                                              raw_data['wild_point_count'],
                                              raw_data['missing_line'],
                                              raw_data['user_id'])
                stroke_counts.append(len(handwriting.get_pointlist()))
                recordings.append(handwriting)
            except Exception as e:
                logging.info("Raw data id: %s", raw_data['id'])
                logging.info(e)
        if len(stroke_counts) > 0:
            logging.info("\t[%i - %i]", min(stroke_counts), max(stroke_counts))
            median = numpy.median(stroke_counts)
            logging.info("\tMedian: %0.2f\tMean: %0.2f\tstd: %0.2f",
                         median,
                         numpy.mean(stroke_counts),
                         numpy.std(stroke_counts))

            # Make prob
            s = sorted(Counter(stroke_counts).items(),
                       key=lambda n: n[1],
                       reverse=True)
            key = formula['formula_in_latex']
            prob[key] = {}
            for stroke_nr, count in s:
                prob[key][stroke_nr] = count

            # Outliers
            modes = get_modes(stroke_counts)
            logging.info("\tModes: %s", modes)
            exceptions = []
            for rec in recordings:
                if len(rec.get_pointlist()) not in modes:
                    url = (("http://www.martin-thoma.de/"
                            "write-math/view/?raw_data_id=%i - "
                            "%i strokes") % (rec.raw_data_id,
                                             len(rec.get_pointlist())))
                    dist = get_dist(len(rec.get_pointlist()), modes)
                    exceptions.append((url, len(rec.get_pointlist()), dist))
            print_exceptions(exceptions, max_print=10)
        else:
            logging.debug("No recordings for symbol "
                          "'http://www.martin-thoma.de/"
                          "write-math/symbol/?id=%s'.",
                          formula['id'])
    write_prob(prob, "prob_stroke_count_by_symbol.yml")

示例#7

0

显示文件

文件： create_testset_online_once.py 项目： templateK/write-math

                for el in add_new:
                    print("\thttp://write-math.com/view/?raw_data_id=%i" % el)
            for rid in add_new:
                sql = ("UPDATE `wm_raw_draw_data` SET `is_in_testset`=1 "
                       "WHERE `id` = %i LIMIT 1") % rid
                cursor.execute(sql)
            connection.commit()


def get_parser():
    """Return the parser object for this script."""
    from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
    parser = ArgumentParser(description=__doc__,
                            formatter_class=ArgumentDefaultsHelpFormatter)
    parser.add_argument("-s", "--symbol",
                        dest="symbol_filename",
                        type=lambda x: utils.is_valid_file(parser, x),
                        required=True,
                        help="symbol yml file",
                        metavar="FILE")
    return parser


if __name__ == '__main__':
    args = get_parser().parse_args()
    cfg = utils.get_database_configuration()
    if 'mysql_online' in cfg:
        main(cfg['mysql_online'], args.symbol_filename)
    if 'mysql_local' in cfg:
        main(cfg['mysql_local'], args.symbol_filename)

示例#8

0

显示文件

文件： evaluate_preprocessing_algorithms.py 项目： MartinThoma/write-math

def main(cfg, raw_data_start_id):
    cfg = utils.get_database_configuration()
    mysql = cfg['mysql_online']
    connection = pymysql.connect(host=mysql['host'],
                                 user=mysql['user'],
                                 passwd=mysql['passwd'],
                                 db=mysql['db'],
                                 cursorclass=pymysql.cursors.DictCursor)
    cursor = connection.cursor()

    # Get formulas
    print("Get formulas")
    sql = ("SELECT `id`, `formula_in_latex` FROM `wm_formula` WHERE `id` > %s")
    cursor.execute(sql, (raw_data_start_id, ))
    formulas = cursor.fetchall()
    formulaid2latex = {}
    for el in formulas:
        formulaid2latex[el['id']] = el['formula_in_latex']

    preprocessing_queue = [preprocessing.ScaleAndShift(),
                           # preprocessing.Douglas_peucker(EPSILON=0.2),
                           # preprocessing.Space_evenly(number=100,
                           #                            kind='cubic')
                           ]

    checked_formulas = 0
    checked_raw_data_instances = 0

    for formula_id in formulaid2latex.keys():
        if formula_id == 1:
            # This formula id is for trash. No need to look at it.
            continue
        # Get data
        print("Get data for formula_id %i (%s)" % (formula_id,
                                                   formulaid2latex[formula_id])
              )
        sql = ("SELECT `id`, `data`, `accepted_formula_id`, "
               "`wild_point_count`, `missing_line`, `has_hook`, "
               "`has_too_long_line`, `is_image`, `administrator_edit`, "
               "`other_problem`, `has_interrupted_line` "
               "FROM  `wm_raw_draw_data` "
               "WHERE `accepted_formula_id` = %i "
               "ORDER BY `administrator_edit` DESC, "
               "`creation_date` ASC;") % formula_id
        cursor.execute(sql)
        raw_datasets = cursor.fetchall()
        print("Raw datasets: %i" % len(raw_datasets))
        checked_raw_data_instances += len(raw_datasets)
        checked_formulas += 1
        if len(raw_datasets) < 100:
            continue

        for i, data in enumerate(raw_datasets):
            if data['data'] == "[]":
                continue
            B = HandwrittenDataM(data['data'],
                                 data['accepted_formula_id'],
                                 data['wild_point_count'],
                                 data['missing_line'],
                                 data['has_hook'],
                                 data['has_too_long_line'],
                                 data['is_image'],
                                 data['other_problem'],
                                 data['has_interrupted_line'],
                                 data['id'],
                                 formulaid2latex[formula_id])
            B.preprocessing(preprocessing_queue)
            Bs = deepcopy(B)
            Bs.preprocessing([preprocessing.DotReduction(0.01)])
            if B != Bs:
                before_pointcount = sum([len(line)
                                         for line in B.get_pointlist()])
                after_pointcount = sum([len(line)
                                        for line in Bs.get_pointlist()])
                print("Reduced %i lines to %i lines." %
                      (len(B.get_pointlist()), len(Bs.get_pointlist())))
                print("Reduced %i points to %i points." %
                      (before_pointcount, after_pointcount))
                if before_pointcount - after_pointcount > 2:
                    B.show()
                    Bs.show()

        print("[Status] Checked formulas: %i of %i" % (checked_formulas,
                                                       len(formulaid2latex)))
        print("[Status] Checked raw_data_instances: %i" %
              checked_raw_data_instances)
    print("done")

示例#9

0

显示文件

文件： get_probabilites_of_stroke_counts.py 项目： templateK/write-math

def main(dataset='all'):
    """
    Parameters
    ----------
    dataset : string
        Either 'all' or a path to a yaml symbol file.
    """
    cfg = utils.get_database_configuration()
    mysql = cfg['mysql_online']
    connection = pymysql.connect(host=mysql['host'],
                                 user=mysql['user'],
                                 passwd=mysql['passwd'],
                                 db=mysql['db'],
                                 cursorclass=pymysql.cursors.DictCursor)
    cursor = connection.cursor()

    # TODO: no formulas, only single-symbol ones.
    formulas = get_formulas(cursor, dataset)
    prob = {}

    # Go through each formula and download every raw_data instance
    for formula in formulas:
        stroke_counts = []
        recordings = []
        sql = ((
            "SELECT `wm_raw_draw_data`.`id`, `data`, `is_in_testset`, "
            "`wild_point_count`, `missing_line`, `user_id`, "
            "`display_name` "
            "FROM `wm_raw_draw_data` "
            "JOIN `wm_users` ON "
            "(`wm_users`.`id` = `wm_raw_draw_data`.`user_id`) "
            "WHERE `accepted_formula_id` = %s "
            "AND wild_point_count=0 "
            "AND has_correction=0 "
            # "AND `display_name` LIKE 'MfrDB::%%'"
        ) % str(formula['id']))
        cursor.execute(sql)
        raw_datasets = cursor.fetchall()
        logging.info("%s (%i)", formula['formula_in_latex'], len(raw_datasets))
        for raw_data in raw_datasets:
            try:
                handwriting = HandwrittenData(raw_data['data'], formula['id'],
                                              raw_data['id'],
                                              formula['formula_in_latex'],
                                              raw_data['wild_point_count'],
                                              raw_data['missing_line'],
                                              raw_data['user_id'])
                stroke_counts.append(len(handwriting.get_pointlist()))
                recordings.append(handwriting)
            except Exception as e:
                logging.info("Raw data id: %s", raw_data['id'])
                logging.info(e)
        if len(stroke_counts) > 0:
            logging.info("\t[%i - %i]", min(stroke_counts), max(stroke_counts))
            median = numpy.median(stroke_counts)
            logging.info("\tMedian: %0.2f\tMean: %0.2f\tstd: %0.2f", median,
                         numpy.mean(stroke_counts), numpy.std(stroke_counts))

            # Make prob
            s = sorted(Counter(stroke_counts).items(),
                       key=lambda n: n[1],
                       reverse=True)
            key = formula['formula_in_latex']
            prob[key] = {}
            for stroke_nr, count in s:
                prob[key][stroke_nr] = count

            # Outliers
            modes = get_modes(stroke_counts)
            logging.info("\tModes: %s", modes)
            exceptions = []
            for rec in recordings:
                if len(rec.get_pointlist()) not in modes:
                    url = (("http://www.martin-thoma.de/"
                            "write-math/view/?raw_data_id=%i - "
                            "%i strokes") %
                           (rec.raw_data_id, len(rec.get_pointlist())))
                    dist = get_dist(len(rec.get_pointlist()), modes)
                    exceptions.append((url, len(rec.get_pointlist()), dist))
            print_exceptions(exceptions, max_print=10)
        else:
            logging.debug(
                "No recordings for symbol "
                "'http://www.martin-thoma.de/"
                "write-math/symbol/?id=%s'.", formula['id'])
    write_prob(prob, "prob_stroke_count_by_symbol.yml")

示例#10

0

显示文件

def main(cfg, raw_data_start_id):
    cfg = utils.get_database_configuration()
    mysql = cfg['mysql_online']
    connection = pymysql.connect(host=mysql['host'],
                                 user=mysql['user'],
                                 passwd=mysql['passwd'],
                                 db=mysql['db'],
                                 cursorclass=pymysql.cursors.DictCursor)
    cursor = connection.cursor()

    # Get formulas
    print("Get formulas")
    sql = ("SELECT `id`, `formula_in_latex` FROM `wm_formula` WHERE `id` > %s")
    cursor.execute(sql, (raw_data_start_id, ))
    formulas = cursor.fetchall()
    formulaid2latex = {}
    for el in formulas:
        formulaid2latex[el['id']] = el['formula_in_latex']

    preprocessing_queue = [
        preprocessing.ScaleAndShift(),
        # preprocessing.Douglas_peucker(EPSILON=0.2),
        # preprocessing.Space_evenly(number=100,
        #                            kind='cubic')
    ]

    checked_formulas = 0
    checked_raw_data_instances = 0

    for formula_id in formulaid2latex.keys():
        if formula_id == 1:
            # This formula id is for trash. No need to look at it.
            continue
        # Get data
        print("Get data for formula_id %i (%s)" %
              (formula_id, formulaid2latex[formula_id]))
        sql = ("SELECT `id`, `data`, `accepted_formula_id`, "
               "`wild_point_count`, `missing_line`, `has_hook`, "
               "`has_too_long_line`, `is_image`, `administrator_edit`, "
               "`other_problem`, `has_interrupted_line` "
               "FROM  `wm_raw_draw_data` "
               "WHERE `accepted_formula_id` = %i "
               "ORDER BY `administrator_edit` DESC, "
               "`creation_date` ASC;") % formula_id
        cursor.execute(sql)
        raw_datasets = cursor.fetchall()
        print("Raw datasets: %i" % len(raw_datasets))
        checked_raw_data_instances += len(raw_datasets)
        checked_formulas += 1
        if len(raw_datasets) < 100:
            continue

        for i, data in enumerate(raw_datasets):
            if data['data'] == "[]":
                continue
            B = HandwrittenDataM(data['data'], data['accepted_formula_id'],
                                 data['wild_point_count'],
                                 data['missing_line'], data['has_hook'],
                                 data['has_too_long_line'], data['is_image'],
                                 data['other_problem'],
                                 data['has_interrupted_line'], data['id'],
                                 formulaid2latex[formula_id])
            B.preprocessing(preprocessing_queue)
            Bs = deepcopy(B)
            Bs.preprocessing([preprocessing.DotReduction(0.01)])
            if B != Bs:
                before_pointcount = sum(
                    [len(line) for line in B.get_pointlist()])
                after_pointcount = sum(
                    [len(line) for line in Bs.get_pointlist()])
                print("Reduced %i lines to %i lines." %
                      (len(B.get_pointlist()), len(Bs.get_pointlist())))
                print("Reduced %i points to %i points." %
                      (before_pointcount, after_pointcount))
                if before_pointcount - after_pointcount > 2:
                    B.show()
                    Bs.show()

        print("[Status] Checked formulas: %i of %i" %
              (checked_formulas, len(formulaid2latex)))
        print("[Status] Checked raw_data_instances: %i" %
              checked_raw_data_instances)
    print("done")

示例#11

0

显示文件

文件： find_wrong_symbol_count.py 项目： MartinThoma/write-math

def main():
    cfg = utils.get_database_configuration()
    mysql = cfg['mysql_online']
    find_wrong_count(mysql)

示例#12

0

显示文件

文件： find_wrong_symbol_count.py 项目： templateK/write-math

def main():
    cfg = utils.get_database_configuration()
    mysql = cfg['mysql_online']
    find_wrong_count(mysql)

示例#13

0

显示文件

文件： backup.py 项目： MartinThoma/write-math

def main(destination=os.path.join(utils.get_project_root(),
                                  "raw-datasets"),
         dataset='all',
         renderings=False):
    """Main part of the backup script."""
    time_prefix = time.strftime("%Y-%m-%d-%H-%M")
    filename = ("%s-handwriting_datasets-%s-raw.pickle" %
                (time_prefix, dataset.replace('/', '-')))
    destination_path = os.path.join(destination, filename)
    logging.info("Data will be written to '%s'", destination_path)

    cfg = utils.get_database_configuration()
    mysql = cfg['mysql_online']
    connection = pymysql.connect(host=mysql['host'],
                                 user=mysql['user'],
                                 passwd=mysql['passwd'],
                                 db=mysql['db'],
                                 cursorclass=pymysql.cursors.DictCursor)
    cursor = connection.cursor()

    formulas = get_formulas(cursor, dataset)
    logging.info('Received %i formulas.', len(formulas))
    handwriting_datasets = []
    formula_id2latex = {}

    # Go through each formula and download every raw_data instance
    for formula in formulas:
        formula_id2latex[formula['id']] = formula['formula_in_latex']
        sql = (("SELECT `wm_raw_draw_data`.`id`, `data`, `is_in_testset`, "
                "`wild_point_count`, `missing_line`, `user_id`, "
                "`display_name` "
                "FROM `wm_raw_draw_data` "
                "JOIN `wm_users` ON "
                "(`wm_users`.`id` = `wm_raw_draw_data`.`user_id`) "
                "WHERE `accepted_formula_id` = %s "
                # "AND `display_name` LIKE 'MfrDB::%%'"
                ) %
               str(formula['id']))
        cursor.execute(sql)
        raw_datasets = cursor.fetchall()
        logging.info("%s (%i)", formula['formula_in_latex'], len(raw_datasets))
        for raw_data in raw_datasets:
            try:
                handwriting = HandwrittenData(raw_data['data'],
                                              formula['id'],
                                              raw_data['id'],
                                              formula['formula_in_latex'],
                                              raw_data['wild_point_count'],
                                              raw_data['missing_line'],
                                              raw_data['user_id'],
                                              user_name=raw_data['display_name'])
                handwriting_datasets.append({'handwriting': handwriting,
                                             'id': raw_data['id'],
                                             'formula_id': formula['id'],
                                             'formula_in_latex':
                                             formula['formula_in_latex'],
                                             'is_in_testset':
                                             raw_data['is_in_testset']})
            except Exception as e:
                logging.info("Raw data id: %s", raw_data['id'])
                logging.info(e)
    pickle.dump({'handwriting_datasets': handwriting_datasets,
                 'formula_id2latex': formula_id2latex},
                open(destination_path, "wb"),
                2)

    if renderings:
        logging.info("Start downloading SVG renderings...")
        svgfolder = tempfile.mkdtemp()
        sql = """SELECT t1.formula_id, t1.svg from wm_renderings t1
                 LEFT JOIN wm_renderings t2 ON t1.formula_id = t2.formula_id
                 AND t1.creation_time < t2.creation_time
                 WHERE t2.id is null"""
        cursor.execute(sql)
        formulas = cursor.fetchall()
        logging.info("Create svg...")
        for formula in formulas:
            filename = os.path.join(svgfolder,
                                    "%s.svg" % str(formula['formula_id']))
            with open(filename, 'wb') as temp_file:
                temp_file.write(formula['svg'])
        logging.info("Tar at %s", os.path.abspath("renderings.tar"))

        tar = tarfile.open("renderings.tar.bz2", "w:bz2")
        for fn in os.listdir(svgfolder):
            filename = os.path.join(svgfolder, fn)
            if os.path.isfile(filename):
                print(filename)
                tar.add(filename, arcname=os.path.basename(filename))
        tar.close()