Пример #1
0
    def test_smoke(self, tmpdir, filename):
        p = tmpdir.join("tmp.db")
        con = SimpleSQLite(str(p), "w")

        test_data_file_path = os.path.join(
            os.path.dirname(__file__), "data", filename)
        loader = ptr.TableFileLoader(test_data_file_path)

        success_count = 0

        for tabledata in loader.load():
            if tabledata.is_empty():
                continue

            print(ptw.dump_tabledata(tabledata))

            try:
                con.create_table_from_tabledata(
                    ptr.SQLiteTableDataSanitizer(tabledata).sanitize())
                success_count += 1
            except ValueError as e:
                print(e)

        con.commit()

        assert success_count > 0
Пример #2
0
def gs(ctx, credentials, title, output_path):
    """
    Convert a spreadsheet in Google Sheets to a SQLite database file.

    CREDENTIALS: OAuth2 Google credentials file.
    TITLE: Title of the Google Sheets to convert.
    """

    con = create_database(ctx, output_path)
    verbosity_level = ctx.obj.get(Context.VERBOSITY_LEVEL)
    schema_extractor = get_schema_extractor(con, verbosity_level)
    result_counter = ResultCounter()
    logger = make_logger("{:s} gs".format(PROGRAM_NAME),
                         ctx.obj[Context.LOG_LEVEL])
    table_creator = TableCreator(logger=logger, dst_con=con)

    loader = ptr.GoogleSheetsTableLoader()
    loader.source = credentials
    loader.title = title

    # if typepy.is_null_string(loader.source):
    #     loader.source = app_config_manager.load().get(
    #         ConfigKey.GS_CREDENTIALS_FILE_PATH)

    try:
        for tabledata in loader.load():
            logger.debug(u"loaded tabledata: {}".format(
                six.text_type(tabledata)))

            sqlite_tabledata = ptr.SQLiteTableDataSanitizer(
                tabledata).sanitize()

            try:
                table_creator.create(sqlite_tabledata,
                                     ctx.obj.get(Context.INDEX_LIST))
            except (ptr.ValidationError, ptr.InvalidDataError):
                result_counter.inc_fail()

            logger.info(
                get_success_message(
                    verbosity_level, "google sheets",
                    schema_extractor.get_table_schema_text(
                        tabledata.table_name).strip()))
    except ptr.OpenError as e:
        logger.error(e)
        result_counter.inc_fail()
    except AttributeError:
        logger.error(u"invalid credentials data: path={}".format(credentials))
        result_counter.inc_fail()
    except (ptr.ValidationError, ptr.InvalidDataError) as e:
        logger.error(u"invalid credentials data: path={}, message={}".format(
            credentials, str(e)))
        result_counter.inc_fail()

    write_completion_message(logger, output_path, result_counter)

    sys.exit(result_counter.get_return_code())
Пример #3
0
def url(ctx, url, format_name, output_path, encoding, proxy):
    """
    Scrape tabular data from a URL and convert data to a SQLite database file.
    """

    if typepy.is_empty_sequence(url):
        sys.exit(ExitCode.NO_INPUT)

    con = create_database(ctx, output_path)
    verbosity_level = ctx.obj.get(Context.VERBOSITY_LEVEL)
    schema_extractor = get_schema_extractor(con, verbosity_level)
    result_counter = ResultCounter()
    logger = make_logger("{:s} url".format(PROGRAM_NAME),
                         ctx.obj[Context.LOG_LEVEL])

    if typepy.is_null_string(proxy):
        proxy = app_config_manager.load().get(ConfigKey.PROXY_SERVER)

    proxies = {
        "http": proxy,
        "https": proxy,
    }

    try:
        loader = create_url_loader(logger, url, format_name, encoding, proxies)
    except ptr.LoaderNotFoundError as e:
        try:
            loader = create_url_loader(logger, url, "html", encoding, proxies)
        except ptr.LoaderNotFoundError as e:
            logger.error(e)
            sys.exit(ExitCode.FAILED_LOADER_NOT_FOUND)

    try:
        for tabledata in loader.load():
            sqlite_tabledata = ptr.SQLiteTableDataSanitizer(
                tabledata).sanitize()

            try:
                TableCreator(dst_con=con, tabledata=sqlite_tabledata).create()
                result_counter.inc_success()
            except (ValueError) as e:
                logger.debug(u"url={}, message={}".format(url, str(e)))
                result_counter.inc_fail()
                continue

            logger.info(
                get_success_message(
                    verbosity_level, url,
                    schema_extractor.get_table_schema_text(
                        sqlite_tabledata.table_name).strip()))
    except ptr.InvalidDataError as e:
        logger.error(u"invalid data: url={}, message={}".format(url, str(e)))
        result_counter.inc_fail()

    write_completion_message(logger, output_path, result_counter)

    sys.exit(result_counter.get_return_code())
Пример #4
0
def file(ctx, files, output_path):
    """
    Convert tabular data within CSV/Excel/HTML/JSON/LTSV/Markdown/SQLite/TSV
    file(s) to a SQLite database file.
    """

    if typepy.is_empty_sequence(files):
        sys.exit(ExitCode.NO_INPUT)

    con = create_database(ctx, output_path)
    verbosity_level = ctx.obj.get(Context.VERBOSITY_LEVEL)
    schema_extractor = get_schema_extractor(con, verbosity_level)
    result_counter = ResultCounter()
    logger = make_logger("{:s} file".format(PROGRAM_NAME),
                         ctx.obj[Context.LOG_LEVEL])
    table_creator = TableCreator(logger=logger, dst_con=con)

    for file_path in files:
        file_path = path.Path(file_path)

        if not file_path.isfile():
            logger.error(u"file not found: {}".format(file_path))
            result_counter.inc_fail()
            continue

        if file_path == output_path:
            logger.warn(
                u"skip a file which has the same path as the output file ({})".
                format(file_path))
            continue

        logger.debug(u"converting '{}'".format(file_path))

        try:
            loader = ptr.TableFileLoader(file_path)
        except ptr.InvalidFilePathError as e:
            logger.debug(e)
            result_counter.inc_fail()
            continue
        except ptr.LoaderNotFoundError:
            logger.debug(
                u"loader not found that coincide with '{}'".format(file_path))
            result_counter.inc_fail()
            continue

        try:
            for tabledata in loader.load():
                logger.debug(u"loaded tabledata: {}".format(
                    six.text_type(tabledata)))

                sqlite_tabledata = ptr.SQLiteTableDataSanitizer(
                    tabledata).sanitize()

                try:
                    table_creator.create(sqlite_tabledata,
                                         ctx.obj.get(Context.INDEX_LIST))
                    result_counter.inc_success()
                except (ValueError, IOError) as e:
                    logger.debug(u"path={}, message={}".format(file_path, e))
                    result_counter.inc_fail()
                    continue

                logger.info(
                    get_success_message(
                        verbosity_level, file_path,
                        schema_extractor.get_table_schema_text(
                            sqlite_tabledata.table_name).strip()))
        except ptr.OpenError as e:
            logger.error(u"open error: file={}, message='{}'".format(
                file_path, str(e)))
            result_counter.inc_fail()
        except ptr.ValidationError as e:
            logger.error(u"invalid {} data format: path={}, message={}".format(
                _get_format_type_from_path(file_path), file_path, str(e)))
            result_counter.inc_fail()
        except ptr.InvalidDataError as e:
            logger.error(u"invalid {} data: path={}, message={}".format(
                _get_format_type_from_path(file_path), file_path, str(e)))
            result_counter.inc_fail()

    write_completion_message(logger, output_path, result_counter)

    sys.exit(result_counter.get_return_code())
Пример #5
0
def url(ctx, url, format_name, output_path, encoding, proxy):
    """
    Fetch data from a URL and convert data to a SQLite database file.
    """

    if dataproperty.is_empty_sequence(url):
        sys.exit(ExitCode.NO_INPUT)

    con = create_database(ctx, output_path)
    verbosity_level = ctx.obj.get(Context.VERBOSITY_LEVEL)
    extractor = get_schema_extractor(con, verbosity_level)
    result_counter = ResultCounter()

    logger = logbook.Logger("sqlitebiter url")
    _setup_logger_from_context(logger, ctx.obj[Context.LOG_LEVEL])

    proxies = {}
    if dataproperty.is_not_empty_string(proxy):
        proxies = {
            "http": proxy,
            "https": proxy,
        }

    try:
        loader = ptr.TableUrlLoader(url,
                                    format_name,
                                    encoding=encoding,
                                    proxies=proxies)
    except ptr.LoaderNotFoundError as e:
        try:
            loader = ptr.TableUrlLoader(url,
                                        "html",
                                        encoding=encoding,
                                        proxies=proxies)
        except (ptr.LoaderNotFoundError, ptr.HTTPError):
            logger.error(e)
            sys.exit(ExitCode.FAILED_LOADER_NOT_FOUND)
    except ptr.HTTPError as e:
        logger.error(e)
        sys.exit(ExitCode.FAILED_HTTP)

    try:
        for tabledata in loader.load():
            sqlite_tabledata = ptr.SQLiteTableDataSanitizer(
                tabledata).sanitize()

            try:
                con.create_table_from_tabledata(sqlite_tabledata)
                result_counter.inc_success()
            except (ValueError) as e:
                logger.debug(u"url={}, message={}".format(url, str(e)))
                result_counter.inc_fail()
                continue

            log_message = get_success_log_format(verbosity_level).format(
                url,
                extractor.get_table_schema_text(
                    sqlite_tabledata.table_name).strip())
            logger.info(log_message)
    except ptr.InvalidDataError as e:
        logger.error(u"invalid data: url={}, message={}".format(url, str(e)))
        result_counter.inc_fail()

    write_completion_message(logger, output_path, result_counter)

    sys.exit(result_counter.get_return_code())
Пример #6
0
def file(ctx, files, output_path):
    """
    Convert tabular data within CSV/Excel/HTML/JSON/LTSV/Markdown/TSV file(s)
    to a SQLite database file.
    """

    if dataproperty.is_empty_sequence(files):
        sys.exit(ExitCode.NO_INPUT)

    con = create_database(ctx, output_path)
    verbosity_level = ctx.obj.get(Context.VERBOSITY_LEVEL)
    extractor = get_schema_extractor(con, verbosity_level)
    result_counter = ResultCounter()

    logger = logbook.Logger("sqlitebiter file")
    _setup_logger_from_context(logger, ctx.obj[Context.LOG_LEVEL])

    for file_path in files:
        file_path = path.Path(file_path)
        if not file_path.isfile():
            logger.debug(u"file not found: {}".format(file_path))
            result_counter.inc_fail()
            continue

        logger.debug(u"converting '{}'".format(file_path))

        try:
            loader = ptr.TableFileLoader(file_path)
        except ptr.InvalidFilePathError as e:
            logger.debug(e)
            result_counter.inc_fail()
            continue
        except ptr.LoaderNotFoundError:
            logger.debug(
                u"loader not found that coincide with '{}'".format(file_path))
            result_counter.inc_fail()
            continue

        try:
            for tabledata in loader.load():
                sqlite_tabledata = ptr.SQLiteTableDataSanitizer(
                    tabledata).sanitize()

                try:
                    con.create_table_from_tabledata(sqlite_tabledata)
                    result_counter.inc_success()
                except (ValueError, IOError) as e:
                    logger.debug(u"path={}, message={}".format(file_path, e))
                    result_counter.inc_fail()
                    continue

                log_message = get_success_log_format(verbosity_level).format(
                    file_path,
                    extractor.get_table_schema_text(
                        sqlite_tabledata.table_name).strip())
                logger.info(log_message)
        except ptr.OpenError as e:
            logger.error(u"open error: file={}, message='{}'".format(
                file_path, str(e)))
            result_counter.inc_fail()
        except ptr.ValidationError as e:
            logger.error(u"invalid {} data format: path={}, message={}".format(
                _get_format_type_from_path(file_path), file_path, str(e)))
            result_counter.inc_fail()
        except ptr.InvalidDataError as e:
            logger.error(u"invalid {} data: path={}, message={}".format(
                _get_format_type_from_path(file_path), file_path, str(e)))
            result_counter.inc_fail()

    write_completion_message(logger, output_path, result_counter)

    sys.exit(result_counter.get_return_code())