def get_vertica_results(credentials, query):
    """Run a single query in Vertica and return the results."""
    credentials_target = ExternalURL(url=credentials).output()
    cred = None
    with credentials_target.open('r') as credentials_file:
        cred = json.load(credentials_file)

    # Externalize autocommit and read timeout
    connection = vertica_python.connect(user=cred.get('username'),
                                        password=cred.get('password'),
                                        host=cred.get('host'),
                                        port=cred.get('port'),
                                        database='warehouse',
                                        autocommit=False,
                                        read_timeout=None)

    if not vertica_client_available:
        raise ImportError('Vertica client library not available')

    try:
        cursor = connection.cursor()
        cursor.execute(query)
        results = cursor.fetchall()
    finally:
        connection.close()

    return results
Пример #2
0
    def get_downstream_task(self):
        # If no downstream task has been set, load our configuration and generate our tasks and dependency chain.
        if self.downstream_task is None:
            script_conf_target = ExternalURL(
                url=self.script_configuration).output()
            with script_conf_target.open('r') as script_conf_file:
                config = yaml.safe_load(script_conf_file)
                if config is not None and isinstance(config, dict):
                    previous_task = None

                    scripts = config.get('scripts', [])

                    # Iterate over the list of scripts in the configuration file in reverse order.  We also zip a list of integers,
                    # representing the zero-based index position of the given script in the overall list.  We iterate in reverse
                    # in order to link each task together, using requires(), to ensure that tasks run sequentially, and in the intended
                    # order: from the top of the file, downwards.
                    for script in scripts:
                        if not self.validate_script_entry(script):
                            log.warn("encountered invalid script entry!")
                            continue

                        new_task = RunVerticaSqlScriptTask(
                            credentials=self.credentials,
                            schema=self.schema,
                            marker_schema=self.marker_schema,
                            date=self.date,
                            read_timeout=self.read_timeout,
                            source_script=path.join(self.script_root,
                                                    script['location']),
                            script_name=script.get('name'))

                        # If we previously configured a task, set it as a dependency of this one, so it runs prior to.
                        if previous_task is not None:
                            new_task.add_dependency(previous_task)

                        # Mark this as the previously-created task.
                        previous_task = new_task

                    self.downstream_task = previous_task

        # If a downstream task has been set, yield it, triggering Luigi to schedule our scripts.
        if self.downstream_task is not None:
            yield self.downstream_task
Пример #3
0
def get_vertica_results(credentials, query):
    """Run a single query in Vertica and return the results."""
    credentials_target = ExternalURL(url=credentials).output()
    cred = None
    with credentials_target.open('r') as credentials_file:
        cred = json.load(credentials_file)

    # Externalize autocommit and read timeout
    connection = vertica_python.connect(user=cred.get('username'), password=cred.get('password'), host=cred.get('host'),
                                        port=cred.get('port'), database='warehouse', autocommit=False,
                                        read_timeout=None)

    if not vertica_client_available:
        raise ImportError('Vertica client library not available')

    try:
        cursor = connection.cursor()
        cursor.execute(query)
        results = cursor.fetchall()
    finally:
        connection.close()

    return results
    def get_downstream_task(self):
        # If no downstream task has been set, load our configuration and generate our tasks and dependency chain.
        if self.downstream_task is None:
            script_conf_target = ExternalURL(url=self.script_configuration).output()
            with script_conf_target.open('r') as script_conf_file:
                config = yaml.safe_load(script_conf_file)
                if config is not None and isinstance(config, dict):
                    previous_task = None

                    scripts = config.get('scripts', [])

                    # Iterate over the list of scripts in the configuration file in reverse order.  We also zip a list of integers,
                    # representing the zero-based index position of the given script in the overall list.  We iterate in reverse
                    # in order to link each task together, using requires(), to ensure that tasks run sequentially, and in the intended
                    # order: from the top of the file, downwards.
                    for script in scripts:
                        if not self.validate_script_entry(script):
                            log.warn("encountered invalid script entry!")
                            continue

                        new_task = RunVerticaSqlScriptTask(
                            credentials=self.credentials, schema=self.schema, marker_schema=self.marker_schema,
                            date=self.date, read_timeout=self.read_timeout, source_script=path.join(self.script_root, script['location']),
                            script_name=script.get('name'))

                        # If we previously configured a task, set it as a dependency of this one, so it runs prior to.
                        if previous_task is not None:
                            new_task.add_dependency(previous_task)

                        # Mark this as the previously-created task.
                        previous_task = new_task

                    self.downstream_task = previous_task

        # If a downstream task has been set, yield it, triggering Luigi to schedule our scripts.
        if self.downstream_task is not None:
            yield self.downstream_task
Пример #5
0
def get_mysql_query_results(credentials, database, query):
    """
    Executes a mysql query on the provided database and returns the results.
    """

    credentials_target = ExternalURL(url=credentials).output()
    cred = None
    with credentials_target.open('r') as credentials_file:
        cred = json.load(credentials_file)

    connection = mysql.connector.connect(user=cred.get('username'),
                                         password=cred.get('password'),
                                         host=cred.get('host'),
                                         port=cred.get('port'),
                                         database=database)

    try:
        cursor = connection.cursor()
        cursor.execute(query)
        results = cursor.fetchall()
    finally:
        connection.close()

    return results
Пример #6
0
def get_mysql_query_results(credentials, database, query):
    """
    Executes a mysql query on the provided database and returns the results.
    """

    credentials_target = ExternalURL(url=credentials).output()
    cred = None
    with credentials_target.open('r') as credentials_file:
        cred = json.load(credentials_file)

    connection = mysql.connector.connect(user=cred.get('username'),
                                         password=cred.get('password'),
                                         host=cred.get('host'),
                                         port=cred.get('port'),
                                         database=database)

    try:
        cursor = connection.cursor()
        cursor.execute(query)
        results = cursor.fetchall()
    finally:
        connection.close()

    return results