def run(self) -> str:
        """Run input command as a subprocess command."""
        try:
            out = os.popen(self.cmd + " 2>&1").read()

            if "Error" in out:
                raise RunnerException(
                    self.task,
                    self.run_id,
                    17,
                    self.error_msg + ("\n" if out != "" else "") + out,
                )

            RunnerLog(self.task, self.run_id, 17, self.success_msg)

            return out

        # pylint: disable=broad-except
        except BaseException as e:
            raise RunnerException(
                self.task,
                self.run_id,
                17,
                self.error_msg + ("\n" if out != "" else "") + "\n" + str(e),
            )
    def web_url(self, url: str) -> str:
        """Get contents of a webpage."""
        try:
            page = requests.get(
                str(url), verify=app.config["HTTP_VERIFY_SSL"])  # noqa: S501
            self.query = page.text
            self.db_type = ("mssql" if self.task.source_database_conn
                            and self.task.source_database_conn.type_id == 2
                            else None)
            if page.status_code != 200:
                raise ValueError(
                    f"{url} returned bad status: {page.status_code}")

            # save query cache before cleanup.
            if self.run_id or self.refresh_cache:
                self.task.source_cache = self.query
                db.session.commit()

                if self.refresh_cache:
                    RunnerLog(self.task, self.run_id, 15,
                              "Source cache manually refreshed.")

            # insert params
            return self.cleanup()

        # pylint: disable=broad-except
        except BaseException as e:
            if (self.run_id and self.task.enable_source_cache == 1
                    and self.task.source_cache):
                RunnerLog(
                    self.task,
                    self.run_id,
                    15,
                    f"Failed to get source from {url}. Using cached query.\nFull trace:\n{e}",
                )

                self.db_type = ("mssql" if self.task.source_database_conn
                                and self.task.source_database_conn.type_id == 2
                                else None)

                self.query = self.task.source_cache

                return self.cleanup()

            elif (self.run_id and self.task.enable_source_cache == 1
                  and not self.task.source_cache):
                raise RunnerException(
                    self.task,
                    self.run_id,
                    15,
                    f"Failed to get source from {url}. Cache enabled, but no cache available.\n{e}",
                )
            else:
                raise RunnerException(
                    self.task, self.run_id, 15,
                    f"Failed to get source from {url}\n{e}.")
Exemplo n.º 3
0
    def __close(self) -> None:
        try:
            self.conn.close()

        except BaseException as e:
            raise RunnerException(self.task, self.run_id, 13,
                                  f"Failed to close connection.\n{e}")
 def __connect(self) -> paramiko.SSHClient:
     try:
         return connect(self.connection)
     except BaseException as e:
         raise RunnerException(
             self.task, self.run_id, 19, f"Failed to connect.\n{e}"
         )
Exemplo n.º 5
0
    def __connect(self) -> Tuple[Transport, SFTPClient]:

        try:
            return connect(self.connection)
        except ValueError as e:

            raise RunnerException(self.task, self.run_id, 9, str(e))
    def __build_env(self) -> None:
        """Build a virtual environment.

        Runs command:
        .. code-block:: console

            virtualenv <path>

        """
        try:
            Cmd(
                task=self.task,
                run_id=self.run_id,
                cmd=f'virtualenv "{self.env_path}"',
                success_msg=f"Environment created.\n{self.env_path}",
                error_msg=f"Failed to create environment.\n{self.env_path}",
            ).shell()

        # pylint: disable=broad-except
        except BaseException as e:
            raise RunnerException(
                self.task,
                self.run_id,
                14,
                f"Failed to build environment.\n{self.base_path}\n{e}",
            )
    def string_to_date(self) -> str:
        """Return a complete date string.

        Input string is split into date parts. Each part is individually converted to a
        string. All string parts are then re-joined.
        """
        def get_repeating_part(parts: List[str]) -> Optional[str]:
            """Find and return first duplicate part in array."""
            my_counter: Counter = Counter()
            for part in parts:
                my_counter[part] += 1
                if my_counter[part] > 1:
                    return part

            return None

        try:
            parameters = [
                x.group() for x in re.finditer(r"%[a-zA-Z]", self.date_string)
            ]

            parts = []
            # split into parts
            param = get_repeating_part(parameters)
            if param:
                date_string = self.date_string
                while param:
                    # split the date string on the first two parameters.
                    # ex: 'file_name','%y%m_stuff','%y%m_otherstuff_%y%m'
                    split_parts = date_string.split(param, 2)

                    # join the first to parts and append them to our part list.
                    parts.append(param.join(split_parts[:2]))

                    # update the date string with our remainder (3rd element in array)
                    date_string = param + split_parts[2]

                    # update remaining parameters
                    parameters = [
                        x.group()
                        for x in re.finditer(r"%[a-zA-Z]", date_string)
                    ]
                    param = get_repeating_part(parameters)

                # need to add on the last part, if there are no more duplicate params.
                # pylint: disable=W0120
                else:
                    parts.append(date_string)

            else:
                parts.append(self.date_string)

            self.date_string = ("").join(
                [self.get_date_part(part) for part in parts])
            return self.date_string

        except BaseException as e:
            raise RunnerException(self.task, self.run_id, 17,
                                  f"Failed to parse date string.\n{e}")
    def __close(self) -> None:
        try:
            self.session.close()

        except BaseException as e:
            raise RunnerException(
                self.task, self.run_id, 19, f"Failed to disconnect.\n{e}"
            )
Exemplo n.º 9
0
    def __clean_up(self) -> None:
        # remove file
        try:
            if Path(self.temp_path).exists():
                shutil.rmtree(self.temp_path)

        # pylint: disable=broad-except
        except BaseException as e:
            raise RunnerException(self.task, self.run_id, 8,
                                  f"Failed to clean up job.\n{e}")
Exemplo n.º 10
0
    def save(self, overwrite: int, file_name: str) -> None:
        """Use to copy local file to FTP server.

        :returns: true if successful.
        """
        self.__connect()
        try:
            self.conn.cwd(self.connection.path or "/")

        # pylint: disable=broad-except
        except BaseException as e:
            raise RunnerException(self.task, self.run_id, 13,
                                  f"Failed to change path.\n{e}")

        if overwrite != 1:
            try:
                self.conn.size(file_name)
                RunnerLog(
                    self.task,
                    self.run_id,
                    13,
                    "File already exists and will not be loaded.",
                )
                self.__close()

                return

            # pylint: disable=broad-except
            except BaseException:
                pass

        try:
            with open(str(self.dir.joinpath(file_name)), "rb") as file:
                self.conn.storbinary("STOR " + file_name, file)

            RunnerLog(self.task, self.run_id, 13, "File loaded to server.")

            self.__close()

        except BaseException as e:
            raise RunnerException(self.task, self.run_id, 13,
                                  f"Failed to save file on server.\n{e}")
    def source(self, query: Optional[str] = None) -> str:
        """Get task source code."""
        try:
            self.query = query or self.task.source_code or ""
            self.db_type = ("mssql" if self.task.source_database_conn
                            and self.task.source_database_conn.type_id == 2
                            else None)
            return self.cleanup()

        # pylint: disable=broad-except
        except BaseException as e:
            raise RunnerException(self.task, self.run_id, 15,
                                  f"Failed to clean source code.\n{e}.")
    def read(self, file_name: str) -> List[IO[str]]:
        """Read file contents of network file path.

        Data is loaded into a temp file.

        Returns a path or raises an exception.
        """
        try:
            # if there is a wildcard in the filename
            if "*" in file_name:
                RunnerLog(self.task, self.run_id, 10,
                          "Searching for matching files...")

                # a smb file name can be a path, but listpath
                # will only list current folder.
                # we need to split the filename path and iter
                # through the folders that match.

                # get the path up to the *.
                base_dir = str(Path(file_name.split("*")[0]).parent)

                file_list = []
                for _, _, walk_file_list in self._walk(base_dir):
                    for this_file in walk_file_list:
                        if fnmatch.fnmatch(this_file, file_name):
                            file_list.append(this_file)

                RunnerLog(
                    self.task,
                    self.run_id,
                    10,
                    "Found %d file%s.\n%s" % (
                        len(file_list),
                        ("s" if len(file_list) != 1 else ""),
                        "\n".join(file_list),
                    ),
                )

                # if a file was found, try to open.
                return [self.__load_file(file_name) for file_name in file_list]

            return [self.__load_file(file_name)]
        except BaseException as e:
            raise RunnerException(
                self.task,
                self.run_id,
                10,
                f"File failed to load file from server.\n{e}",
            )
Exemplo n.º 13
0
    def read(self, file_name: str) -> List[IO[str]]:
        """Read a file from FTP server.

        Data is loaded into a temp file.

        Returns a path or raises an exception.
        """
        try:
            self.conn.cwd(self.__clean_path(self.connection.path or "/"))

            if "*" in file_name:
                RunnerLog(self.task, self.run_id, 13,
                          "Searching for matching files...")

                # get the path up to the *
                base_dir = str(Path(file_name.split("*")[0]).parent)

                file_list = []
                for _, _, walk_file_list in self._walk(base_dir):
                    for this_file in walk_file_list:

                        if fnmatch.fnmatch(this_file, file_name):
                            file_list.append(this_file)

                RunnerLog(
                    self.task,
                    self.run_id,
                    13,
                    "Found %d file%s.\n%s" % (
                        len(file_list),
                        ("s" if len(file_list) != 1 else ""),
                        "\n".join(file_list),
                    ),
                )
                return [self.__load_file(file_name) for file_name in file_list]

            return [self.__load_file(file_name)]

        # pylint: disable=broad-except
        except BaseException as e:
            raise RunnerException(
                self.task,
                self.run_id,
                13,
                f"File failed to load file from server.\n{e}",
            )
    def __connect(self) -> SMBConnection:
        """Connect to SMB server.

        After making a connection we save it to redis. Next time we need a connection
        we can grab if from redis and attempt to use. If it is no longer connected
        then reconnect.

        Because we want to use existing connection we will not close them...
        """
        try:
            return connect(
                str(self.username),
                str(self.password),
                str(self.server_name),
                str(self.server_ip),
            )
        except ValueError as e:
            raise RunnerException(self.task, self.run_id, 10, str(e))
    def __run_script(self) -> None:
        try:
            # if data files exist, pass them as a param.
            cmd = (
                f'"{self.env_path}/bin/python" "{self.job_path}/{self.script}" '
            ) + " ".join([f'"{x.name}"' for x in self.source_files])

            self.output = Cmd(
                task=self.task,
                run_id=self.run_id,
                cmd=cmd,
                success_msg="Script successfully run.",
                error_msg="Failed run script: " + "\n" + cmd,
            ).shell()

        except BaseException as e:
            raise RunnerException(
                self.task,
                self.run_id,
                14,
                f"Failed to build run script.\n{self.base_path}\n{e}",
            )
Exemplo n.º 16
0
    def save(self, overwrite: int, file_name: str) -> None:
        """Use to copy local file to FTP server.

        :returns: true if successful.
        """
        try:
            self.conn.chdir(self.__clean_path(self.connection.path or "/"))

        except BaseException as e:
            raise RunnerException(self.task, self.run_id, 9,
                                  f"Failed to change path.\n{e}")

        if overwrite != 1:
            try:
                self.conn.stat(file_name)
                RunnerLog(
                    self.task,
                    self.run_id,
                    9,
                    "File already exists and will not be loaded.",
                )

                self.__close()

                return

            except BaseException:
                # continue of file does not exist.
                pass

        try:
            #  some sftp server do not allow overwrites. When attempted will
            #  return a permission error or other. So we log if the file exists
            #  to help with debugging.
            try:
                self.conn.stat(file_name)
                RunnerLog(
                    self.task,
                    self.run_id,
                    9,
                    "File already exist. Attempting to overwrite.",
                )

            # pylint: disable=broad-except
            except BaseException:
                # continue of file does not exist.
                pass

            self.conn.put(str(self.dir.joinpath(file_name)),
                          file_name,
                          confirm=True)

            # file is now confirmed on server w/ confirm=True flag
            RunnerLog(
                self.task,
                self.run_id,
                9,
                f"{file_size(self.conn.stat(file_name).st_size or 0)} stored on server as {file_name}.",
            )

            self.__close()

        except BaseException as e:
            raise RunnerException(self.task, self.run_id, 9,
                                  f"Failed to save file on server.\n{e}")
    def gitlab(self, url: str) -> str:
        """Get source code from gitlab using authentication."""
        # pylint: disable=too-many-statements
        if ".git" in str(url):
            return ""

        if url:
            try:
                # convert the "raw" url into an api url
                branch = urllib.parse.quote(
                    urllib.parse.unquote(
                        re.findall(r"\/(?:raw|blob)\/(.+?)\/", url)[0]),
                    safe="",
                )

                project = urllib.parse.quote(
                    urllib.parse.unquote(
                        re.findall(r"\.(?:com|net|org)\/(.+?)\/-", url)[0]),
                    safe="",
                )

                file_path = urllib.parse.quote(
                    urllib.parse.unquote(
                        re.findall(r"\/(?:raw|blob)\/.+?\/(.+?)$", url)[0]),
                    safe="",
                )

                api_url = "%sapi/v4/projects/%s/repository/files/%s/raw?ref=%s" % (
                    app.config["GIT_URL"],
                    project,
                    file_path,
                    branch,
                )

                headers = {
                    "PRIVATE-TOKEN": app.config["GIT_TOKEN"],
                    "Connection": "close",
                }
                page = requests.get(api_url,
                                    verify=app.config["GIT_VERIFY_SSL"],
                                    headers=headers)  # noqa: S501

                if page.status_code != 200:
                    raise Exception("Failed to get code: " + page.text)

                if url.lower().endswith(".sql"):
                    self.query = page.text
                    self.db_type = ("mssql" if self.task.source_database_conn
                                    and self.task.source_database_conn.type_id
                                    == 2 else None)

                    # save query cache before cleanup.
                    if self.run_id or self.refresh_cache:
                        self.task.source_cache = self.query
                        db.session.commit()

                        if self.refresh_cache:
                            RunnerLog(
                                self.task,
                                self.run_id,
                                15,
                                "Source cache manually refreshed.",
                            )

                    # insert params
                    return self.cleanup()

                return (page.text if not page.text.startswith("<!DOCTYPE") else
                        "Visit URL to view code")

            # pylint: disable=broad-except
            except BaseException as e:
                # only use cache if we have a run id. Otherwise failures are from code preview.
                if (self.run_id and self.task.enable_source_cache == 1
                        and self.task.source_cache):
                    RunnerLog(
                        self.task,
                        self.run_id,
                        15,
                        f"Failed to get source from {url}. Using cached query.\nFull trace:\n{e}",
                    )

                    self.db_type = ("mssql" if self.task.source_database_conn
                                    and self.task.source_database_conn.type_id
                                    == 2 else None)

                    self.query = self.task.source_cache
                    return self.cleanup()

                elif (self.run_id and self.task.enable_source_cache == 1
                      and not self.task.source_cache):
                    raise RunnerException(
                        self.task,
                        self.run_id,
                        15,
                        f"Failed to get source from {url}. Cache enabled, but no cache available.\n{e}",
                    )
                else:
                    raise RunnerException(
                        self.task,
                        self.run_id,
                        15,
                        f"Failed to get source from {url}.\n{e}",
                    )

        raise RunnerException(self.task, self.run_id, 15,
                              "No url specified to get source from.")
Exemplo n.º 18
0
    def __connect(self) -> FTP:

        try:
            return connect(self.connection)
        except ValueError as e:
            raise RunnerException(self.task, self.run_id, 13, str(e))
    def run(self) -> None:
        """Run an SSH Command.

        First, this will make a connection then run the command
        Some code from https://stackoverflow.com/a/32758464 - thanks!
        :returns: Output from command.
        """
        self.__connect()
        timeout = 600
        try:
            RunnerLog(
                self.task,
                self.run_id,
                19,
                "Starting command.",
            )
            # pylint: disable=W0612
            stdin, stdout, stderr = self.session.exec_command(  # noqa: S601
                self.command, timeout=timeout
            )

            channel = stdout.channel

            stdin.close()
            channel.shutdown_write()

            stderr_data = b""
            stdout_data = b""

            while (
                not channel.closed
                or channel.recv_ready()
                or channel.recv_stderr_ready()
            ):
                got_chunk = False
                readq, _, _ = select.select([stdout.channel], [], [], timeout)

                for chunk in readq:
                    if chunk.recv_ready():
                        stdout_data += stdout.channel.recv(len(chunk.in_buffer))
                        got_chunk = True
                    if chunk.recv_stderr_ready():
                        stderr_data += stderr.channel.recv_stderr(
                            len(chunk.in_stderr_buffer)
                        )
                        got_chunk = True

                    if (
                        not got_chunk
                        and stdout.channel.exit_status_ready()
                        and not stderr.channel.recv_stderr_ready()
                        and not stdout.channel.recv_ready()
                    ):
                        # indicate that we're not going to read from this channel anymore
                        stdout.channel.shutdown_read()
                        # close the channel
                        stdout.channel.close()
                        break  # exit as remote side is finished and our buffers are empty

                time.sleep(0.01)

                # timeout after a few minutes

            out = stdout_data.decode("utf-8") or "None"
            err = stderr_data.decode("utf-8") or "None"

            if stdout.channel.recv_exit_status() != 0 or stderr_data != b"":
                raise ValueError(
                    f"Command stdout: {out}\nCommand stderr: {err}",
                )

            RunnerLog(
                self.task,
                self.run_id,
                19,
                f"Command output:\n{out}",
            )

        except BaseException as e:
            raise RunnerException(
                self.task, self.run_id, 19, f"Failed to run command.\n{e}"
            )

        self.__close()
    def shell(self) -> str:
        """Run input command as a shell command."""
        try:
            out_bytes = subprocess.check_output(
                self.cmd, stderr=subprocess.STDOUT, shell=True
            )
            out = out_bytes.decode("utf-8")

            if "Error" in out:
                raise RunnerException(
                    self.task,
                    self.run_id,
                    17,
                    self.error_msg
                    + ("\n" if out != "" else "")
                    + re.sub(
                        r"(?<=:)([^:]+?)(?=@)",
                        "*****",
                        out,
                        flags=re.IGNORECASE | re.MULTILINE,
                    ),
                )

            RunnerLog(
                self.task,
                self.run_id,
                17,
                self.success_msg + (("\n" + out) if out != "" else ""),
            )

            return out

        except subprocess.CalledProcessError as e:
            out = e.output.decode("utf-8")
            raise RunnerException(
                self.task,
                self.run_id,
                17,
                self.error_msg
                + (("\n" + out) if out != "" else "")
                + "\n"
                + re.sub(
                    r"(?<=:)([^:]+?)(?=@)",
                    "*****",
                    str(e),
                    flags=re.IGNORECASE | re.MULTILINE,
                ),
            )

        except BaseException as e:
            raise RunnerException(
                self.task,
                self.run_id,
                17,
                "Command failed.\n"
                + (("\n" + out) if out != "" else "")
                + "\n"
                + re.sub(
                    r"(?<=:)([^:]+?)(?=@)",
                    "*****",
                    str(e),
                    flags=re.IGNORECASE | re.MULTILINE,
                ),
            )
Exemplo n.º 21
0
    def save(self) -> Tuple[str, str, str]:
        """Create and save the file.

        returns [filename, filepath] of final file.
        """
        if (self.task.destination_file_name is None
                or self.task.destination_file_name == ""):
            RunnerLog(
                self.task,
                self.run_id,
                11,
                f"No filename specified, {Path(self.data_file.name).name} will be used.",
            )

        if (self.task.destination_file_name != ""
                and self.task.destination_file_name is not None):

            # insert params
            self.file_name = self.params.insert_file_params(
                self.task.destination_file_name.strip())

            # parse python dates
            self.file_name = DateParsing(self.task, self.run_id,
                                         self.file_name).string_to_date()

        else:
            self.file_name = Path(self.data_file.name).name

        # 4 is other
        if self.task.destination_file_type_id != 4 and self.task.file_type is not None:
            self.file_name += "." + (self.task.file_type.ext or "csv")

        self.file_path = str(Path(self.base_path).joinpath(self.file_name))

        # if the source name matches the destination name, rename the source and update tmp file name.
        if self.data_file.name == self.file_path:
            data_file_as_path = Path(self.data_file.name)
            new_data_file_name = str(
                data_file_as_path.parent /
                (data_file_as_path.stem + "_tmp" + data_file_as_path.suffix))
            os.rename(self.data_file.name, new_data_file_name)
            self.data_file.name = new_data_file_name  # type: ignore[misc]

        with open(self.data_file.name, "r", newline="") as data_file:
            reader = csv.reader(data_file)

            with open(self.file_path, mode="w") as myfile:
                # if csv (1) or text (2) and had delimiter

                if (self.task.destination_file_type_id == 1
                        or self.task.destination_file_type_id == 2
                        or self.task.destination_file_type_id == 4) and (
                            self.task.destination_ignore_delimiter is None
                            or self.task.destination_ignore_delimiter != 1):
                    wrtr = (
                        csv.writer(
                            myfile,
                            delimiter=str(self.task.destination_file_delimiter)
                            .encode("utf-8").decode("unicode_escape"),
                            quoting=self.__quote_level(),
                        ) if self.task.destination_file_delimiter is not None
                        and len(self.task.destination_file_delimiter) > 0 and
                        (self.task.destination_file_type_id == 2
                         or self.task.destination_file_type_id == 4
                         )  # txt or other
                        else csv.writer(
                            myfile,
                            quoting=self.__quote_level(),
                        ))
                    for row in reader:
                        new_row = [(x.strip('"').strip("'") if isinstance(
                            x, str) else x) for x in row]

                        if (self.task.destination_file_type_id == 1
                                or self.task.destination_file_type_id == 2
                                or self.task.destination_file_type_id == 4
                            ) and (self.task.destination_file_line_terminator
                                   is not None and
                                   self.task.destination_file_line_terminator
                                   != ""):
                            new_row.append(
                                self.task.destination_file_line_terminator)

                        wrtr.writerow(new_row)

                # if xlxs (3)
                elif self.task.destination_file_type_id == 3:
                    wrtr = csv.writer(
                        myfile,
                        dialect="excel",
                        quoting=self.__quote_level(),
                    )
                    for row in reader:
                        new_row = [(x.strip('"').strip("'") if isinstance(
                            x, str) else x) for x in row]
                        wrtr.writerow(new_row)

                else:
                    for line in data_file:
                        myfile.write(line)

        RunnerLog(
            self.task,
            self.run_id,
            11,
            f"File {self.file_name} created. Size: {file_size(Path(self.file_path).stat().st_size)}.\n{self.file_path}",
        )

        # encrypt file
        if self.task.file_gpg == 1:
            gpg = gnupg.GPG("/usr/local/bin/gpg")

            # import the key
            keychain = gpg.import_keys(
                em_decrypt(self.task.file_gpg_conn.key,
                           app.config["PASS_KEY"]))

            # set it to trusted
            gpg.trust_keys(keychain.fingerprints, "TRUST_ULTIMATE")

            # encrypt file
            with open(self.file_path, "rb") as my_file:
                encrypt_status = gpg.encrypt_file(
                    file=my_file,
                    recipients=keychain.fingerprints,
                    output=self.file_path + ".gpg",
                )

            # remove key
            gpg.delete_keys(keychain.fingerprints)

            # update global file name
            if not encrypt_status.ok:
                raise RunnerException(
                    self.task,
                    self.run_id,
                    11,
                    "File failed to encrypt.\n%s\n%s\n%s" % (
                        self.file_path,
                        encrypt_status.status,
                        encrypt_status.stderr,
                    ),
                )

            self.file_path = self.file_path + ".gpg"
            self.file_name = self.file_name + ".gpg"

            RunnerLog(
                self.task,
                self.run_id,
                11,
                "File encrypted.\n%s\n%s\n%s" %
                (self.file_path, encrypt_status.status, encrypt_status.stderr),
            )

        # get file hash.. after encrypting
        with open(self.file_path, "rb") as my_file:
            while True:
                chunk = my_file.read(8192)
                if not chunk:
                    break
                self.file_hash.update(chunk)

        RunnerLog(self.task, self.run_id, 11,
                  f"File md5 hash: {self.file_hash.hexdigest()}")

        # create zip
        if self.task.destination_create_zip == 1:

            self.zip_name = DateParsing(
                self.task, self.run_id,
                str(self.task.destination_zip_name)).string_to_date()

            # parse params
            self.zip_name = self.params.insert_file_params(self.zip_name)

            self.zip_name = self.zip_name.replace(".zip", "") + ".zip"

            with zipfile.ZipFile(
                    str(Path(self.base_path).joinpath(self.zip_name)),
                    "w") as zip_file:
                zip_file.write(
                    self.file_path,
                    compress_type=zipfile.ZIP_DEFLATED,
                    arcname=self.file_name,
                )

            # now we change all file stuff to our zip.

            self.file_name = self.zip_name
            self.file_path = str(Path(self.base_path).joinpath(self.zip_name))

            RunnerLog(self.task, self.run_id, 11,
                      f"ZIP archive created.\n{self.file_path}")

        return self.file_name, self.file_path, self.file_hash.hexdigest()
Exemplo n.º 22
0
    def __init__(self, task_id: int) -> None:
        """Set up class parameters.

        On sequence jobs, only the first enabled job in the
        sequence should be in the scheduler.
        """
        # Create id for the run instance and assign to tasks being run.
        my_hash = hashlib.sha256()
        my_hash.update(str(time.time() * 1000).encode("utf-8"))

        self.run_id = my_hash.hexdigest()[:10]

        task = Task.query.filter_by(id=task_id).first()

        self.source_files: List[IO[str]]
        self.output_files: List[str] = []

        print("starting task " + str(task.id))  # noqa: T001
        logging.info(
            "Runner: Starting task: %s, with run: %s",
            str(task.id),
            str(my_hash.hexdigest()[:10]),
        )

        # set status to running
        task.status_id = 1
        task.last_run_job_id = self.run_id
        task.last_run = datetime.datetime.now()
        db.session.commit()

        RunnerLog(task, self.run_id, 8, "Starting task!")

        self.task = task

        # If monitor fails then cancel task.
        try:
            system_monitor()

        except ValueError as message:
            raise RunnerException(self.task, self.run_id, 18, message)

        # create temp folder for output
        self.temp_path = Path(
            Path(__file__).parent.parent / "temp" /
            sanitize_filename(self.task.project.name) /
            sanitize_filename(self.task.name) / self.run_id)
        self.temp_path.mkdir(parents=True, exist_ok=True)

        RunnerLog(task, self.run_id, 8, "Loading parameters...")

        self.param_loader = ParamLoader(self.task, self.run_id)

        # load file/ run query/ etc to get some sort of data or process something.
        self.query_output_size: Optional[int] = None
        self.source_loader = SourceCode(self.task, self.run_id,
                                        self.param_loader)
        self.source_files = []
        self.__get_source()

        # any data post-processing
        if self.task.processing_type_id is not None:
            self.__process()

        # store output
        self.__store_files()

        # send confirmation/error emails
        self.__send_email()

        # any cleanup process. remove file from local storage
        self.__clean_up()

        RunnerLog(self.task, self.run_id, 8, "Completed task!")

        # remove any retry tracking
        redis_client.delete(f"runner_{task_id}_attempt")
        task.status_id = 4
        task.est_duration = (datetime.datetime.now() -
                             task.last_run).total_seconds()

        # if this is a sequence job, trigger the next job.
        if task.project.sequence_tasks == 1:
            task_id_list = [
                x.id for x in Task.query.filter_by(enabled=1).filter_by(
                    project_id=task.project_id).order_by(
                        Task.order.asc(),
                        Task.name.asc())  # type: ignore[union-attr]
                .all()
            ]
            # potentially the task was disabled while running
            # and removed from list. when that happens we should
            # quit.
            if task.id in task_id_list:
                next_task_id = task_id_list[task_id_list.index(task.id) +
                                            1:task_id_list.index(task.id) + 2]
                if next_task_id:

                    # trigger next task
                    RunnerLog(
                        self.task,
                        self.run_id,
                        8,
                        f"Triggering run of next sequence job: {next_task_id}.",
                    )

                    next_task = Task.query.filter_by(
                        id=next_task_id[0]).first()

                    RunnerLog(
                        next_task,
                        None,
                        8,
                        f"Run triggered by previous sequence job: {task.id}.",
                    )

                    requests.get(app.config["RUNNER_HOST"] + "/" +
                                 str(next_task_id[0]))

                else:
                    RunnerLog(self.task, self.run_id, 8, "Sequence completed!")

        task.last_run_job_id = None
        task.last_run = datetime.datetime.now()
        db.session.commit()
Exemplo n.º 23
0
    def __send_email(self) -> None:

        logs = (
            TaskLog.query.filter_by(
                task_id=self.task.id, job_id=self.run_id).order_by(
                    TaskLog.status_date.desc())  # type: ignore[union-attr]
            .all())

        error_logs = (TaskLog.query.filter_by(task_id=self.task.id,
                                              job_id=self.run_id,
                                              error=1).order_by(
                                                  TaskLog.status_date).all())

        date = str(datetime.datetime.now())

        # pylint: disable=broad-except
        try:
            template = env.get_template("email/email.html.j2")
        except BaseException as e:
            raise RunnerException(self.task, self.run_id, 8,
                                  f"Failed to get email template.\n{e}")

        # success email
        if self.task.email_completion == 1 and (
            (len(error_logs) < 1 and self.task.email_error == 1)
                or self.task.email_error != 1):
            RunnerLog(self.task, self.run_id, 8, "Sending completion email.")

            output: List[List[str]] = []
            empty = 0
            attachments: List[str] = []

            if self.task.email_completion_file == 1 and len(
                    self.output_files) > 0:
                for output_file in self.output_files:

                    if self.task.email_completion_file_embed == 1:
                        with open(output_file, newline="") as csvfile:
                            output.extend(list(csv.reader(csvfile)))

                    # check attachement file size if the task
                    # should not send blank files
                    if (self.task.email_completion_dont_send_empty_file == 1
                            and output_file
                            # if query and data is blank, or other types and file is 0
                            and os.path.getsize(output_file) == 0):
                        empty = 1

                    attachments.append(output_file)

                if empty == 1:
                    RunnerLog(
                        self.task,
                        self.run_id,
                        8,
                        "Not sending completion email, file is empty.",
                    )
                    return

            Smtp(
                task=self.task,
                run_id=self.run_id,
                recipients=self.task.email_completion_recipients,
                subject="Project: %s / Task: %s / Run: %s %s" % (
                    self.task.project.name,
                    self.task.name,
                    self.run_id,
                    date,
                ),
                message=template.render(
                    task=self.task,
                    success=1,
                    date=date,
                    logs=logs,
                    output=output,
                    host=app.config["WEB_HOST"],
                ),
                short_message=self.task.email_completion_message
                or f"Atlas Hub job {self.task} completed successfully.",
                attachments=attachments,
            )
Exemplo n.º 24
0
    def __store_files(self) -> None:
        if not self.source_files or len(self.source_files) == 0:
            return

        RunnerLog(
            self.task,
            self.run_id,
            8,
            "Storing output file%s..." %
            ("s" if len(self.source_files) != 1 else ""),
        )

        for file_counter, this_file in enumerate(self.source_files, 1):

            this_file_size = (self.query_output_size
                              if self.query_output_size is not None else Path(
                                  this_file.name).stat().st_size)

            # get file name. if no name specified in task setting, then use temp name.
            try:
                file_name, file_path, file_hash = File(
                    task=self.task,
                    run_id=self.run_id,
                    data_file=this_file,
                    params=self.param_loader,
                ).save()

            except BaseException as e:
                raise RunnerException(self.task, self.run_id, 11,
                                      f"Failed to create data file.\n{e}")

            self.output_files.append(file_path)

            if len(self.source_files) > 1:
                RunnerLog(
                    self.task,
                    self.run_id,
                    8,
                    f"Storing file {file_counter} of {len(self.source_files)}...",
                )
            # store
            # send to sftp
            if self.task.destination_sftp == 1 and self.task.destination_sftp_conn:
                if (self.task.destination_sftp_dont_send_empty_file == 1
                        and this_file_size == 0):
                    RunnerLog(
                        self.task,
                        self.run_id,
                        8,
                        "Skipping SFTP, file is empty.",
                    )
                else:
                    Sftp(
                        task=self.task,
                        run_id=self.run_id,
                        connection=self.task.destination_sftp_conn,
                        directory=self.temp_path,
                    ).save(
                        overwrite=self.task.destination_sftp_overwrite,
                        file_name=file_name,
                    )

            # send to ftp
            if self.task.destination_ftp == 1 and self.task.destination_ftp_conn:
                if (self.task.destination_ftp_dont_send_empty_file == 1
                        and this_file_size == 0):
                    RunnerLog(
                        self.task,
                        self.run_id,
                        8,
                        "Skipping FTP, file is empty.",
                    )
                else:
                    Ftp(
                        task=self.task,
                        run_id=self.run_id,
                        connection=self.task.destination_ftp_conn,
                        directory=self.temp_path,
                    ).save(
                        overwrite=self.task.destination_ftp_overwrite,
                        file_name=file_name,
                    )

            # save to smb
            if self.task.destination_smb == 1 and self.task.destination_smb_conn:
                if (self.task.destination_smb_dont_send_empty_file == 1
                        and this_file_size == 0):
                    RunnerLog(
                        self.task,
                        self.run_id,
                        8,
                        "Skipping SMB, file is empty.",
                    )
                else:
                    Smb(
                        task=self.task,
                        run_id=self.run_id,
                        connection=self.task.destination_smb_conn,
                        directory=self.temp_path,
                    ).save(
                        overwrite=self.task.destination_smb_overwrite,
                        file_name=file_name,
                    )

            # save historical copy
            smb_path = Smb(
                task=self.task,
                run_id=self.run_id,
                connection=None,  # "default",
                directory=self.temp_path,
            ).save(overwrite=1, file_name=file_name)

            # log file details
            db.session.add(
                TaskFile(
                    name=file_name,
                    path=smb_path,
                    task_id=self.task.id,
                    job_id=self.run_id,
                    file_hash=file_hash,
                    size=file_size(str(os.path.getsize(file_path))),
                ))
            db.session.commit()
Exemplo n.º 25
0
    def __process(self) -> None:

        RunnerLog(self.task, self.run_id, 8, "Starting processing script...")
        # get processing script

        # 1 = smb
        # 2 = sftp
        # 3 = ftp
        # 4 = git url
        # 5 = other url
        # 6 = source code

        processing_script_name = self.temp_path / (self.run_id + ".py")

        my_file = ""
        if (self.task.processing_type_id == 1
                and self.task.processing_smb_id is not None):
            file_name = self.param_loader.insert_file_params(
                self.task.source_smb_file)
            file_name = DateParsing(
                task=self.task,
                run_id=self.run_id,
                date_string=file_name,
            ).string_to_date()

            my_file = Path(
                Smb(
                    task=self.task,
                    run_id=self.run_id,
                    directory=self.temp_path,
                    connection=self.task.processing_smb_conn,
                ).read(file_name)[0].name).read_text("utf8")

        elif (self.task.processing_type_id == 2
              and self.task.processing_sftp_id is not None):
            file_name = self.param_loader.insert_file_params(
                self.task.processing_sftp_file)
            file_name = DateParsing(
                task=self.task,
                run_id=self.run_id,
                date_string=file_name,
            ).string_to_date()

            my_file = Path(
                Sftp(
                    task=self.task,
                    run_id=self.run_id,
                    connection=self.task.processing_sftp_conn,
                    directory=self.temp_path,
                ).read(file_name=file_name)[0].name).read_text("utf8")

        elif (self.task.processing_type_id == 3
              and self.task.processing_ftp_id is not None):
            file_name = self.param_loader.insert_file_params(
                self.task.processing_ftp_file)
            file_name = DateParsing(
                task=self.task,
                run_id=self.run_id,
                date_string=file_name,
            ).string_to_date()

            my_file = Path(
                Ftp(
                    task=self.task,
                    run_id=self.run_id,
                    connection=self.task.source_ftp_conn,
                    directory=self.temp_path,
                ).read(file_name=file_name)[0].name).read_text("utf8")

        elif self.task.processing_type_id == 4 and self.task.processing_git is not None:

            # if a dir is specified then download all files
            if (self.task.processing_command is not None
                    and self.task.processing_command != ""):
                try:
                    url = (re.sub(
                        r"(https?://)(.+?)",
                        r"\1<username>:<password>@\2",
                        self.task.processing_git,
                        flags=re.IGNORECASE,
                    ).replace("<username>",
                              urllib.parse.quote(
                                  app.config["GIT_USERNAME"])).replace(
                                      "<password>",
                                      urllib.parse.quote(
                                          app.config["GIT_PASSWORD"])))

                    cmd = (
                        "$(which git) clone -q --depth 1 " +
                        '--recurse-submodules --shallow-submodules %s "%s"' %
                        (url, str(self.temp_path)))

                    Cmd(
                        self.task,
                        self.run_id,
                        cmd,
                        "Repo cloned.",
                        "Failed to clone repo: %s" %
                        (self.task.processing_git, ),
                    ).shell()

                # pylint: disable=broad-except
                except BaseException:
                    raise RunnerException(self.task, self.run_id, 8,
                                          "Processor failed to clone repo.")

            # otherwise get py file
            else:
                my_file = self.source_loader.gitlab(self.task.processing_git)

        elif self.task.processing_type_id == 5 and self.task.processing_url is not None:
            if self.task.processing_command is not None:
                try:

                    cmd = (
                        "$(which git) clone -q --depth 1 " +
                        '--recurse-submodules --shallow-submodules %s "%s"' %
                        (self.task.processing_url, str(self.temp_path)))

                    Cmd(
                        task=self.task,
                        run_id=self.run_id,
                        cmd=cmd,
                        success_msg="Repo cloned",
                        error_msg="Failed to clone repo: %s" %
                        (self.task.processing_url, ),
                    ).shell()

                    processing_script_name = str(self.temp_path) + (
                        self.task.processing_command
                        if self.task.processing_command is not None else "")
                # pylint: disable=broad-except
                except BaseException:
                    raise RunnerException(self.task, self.run_id, 8,
                                          "Processor failed to clone repo.")
            else:
                my_file = self.source_loader.web_url(self.task.processing_url)

        elif (self.task.processing_type_id == 6
              and self.task.processing_code is not None):
            my_file = self.task.processing_code
        elif self.task.processing_type_id > 0:
            raise RunnerException(
                self.task,
                self.run_id,
                8,
                "Processing error, Not enough information to run a processing script from.",
            )

        try:
            if my_file != "" and self.task.processing_type_id > 0:
                Path(processing_script_name).parent.mkdir(parents=True,
                                                          exist_ok=True)
                with open(processing_script_name, "w") as text_file:
                    text_file.write(my_file)
                RunnerLog(self.task, self.run_id, 8,
                          "Processing script created.")

        # pylint: disable=broad-except
        except BaseException as e:
            raise RunnerException(self.task, self.run_id, 8,
                                  f"Processing script failure:\n{e}")

        # run processing script
        output = PyProcesser(
            task=self.task,
            run_id=self.run_id,
            directory=self.temp_path,
            source_files=self.source_files,
            script=self.task.processing_command or processing_script_name.name,
        ).run()

        # # allow processer to rename file
        if output:
            RunnerLog(self.task, self.run_id, 8,
                      f"Processing script output:\n{output}")
            self.data_files = output
Exemplo n.º 26
0
    def __get_source(self) -> None:

        if self.task.source_type_id == 1:  # sql

            external_db = self.task.source_database_conn
            try:
                RunnerLog(self.task, self.run_id, 8, "Loading query...")
                query = self.__get_query()
            except BaseException as e:
                raise RunnerException(self.task, self.run_id, 8,
                                      f"Failed to load query.\n{e}")

            RunnerLog(self.task, self.run_id, 8,
                      "Starting query run, waiting for results...")

            if external_db.database_type.id == 1:  # postgres
                try:
                    self.query_output_size, self.source_files = Postgres(
                        task=self.task,
                        run_id=self.run_id,
                        connection=em_decrypt(external_db.connection_string,
                                              app.config["PASS_KEY"]),
                        timeout=external_db.timeout
                        or app.config["DEFAULT_SQL_TIMEOUT"],
                        directory=self.temp_path,
                    ).run(query)

                except ValueError as message:
                    raise RunnerException(self.task, self.run_id, 21, message)

                except BaseException as message:
                    raise RunnerException(self.task, self.run_id, 21,
                                          f"Failed to run query.\n{message}")

            elif external_db.database_type.id == 2:  # mssql
                try:
                    self.query_output_size, self.source_files = SqlServer(
                        task=self.task,
                        run_id=self.run_id,
                        connection=em_decrypt(external_db.connection_string,
                                              app.config["PASS_KEY"]),
                        timeout=external_db.timeout
                        or app.config["DEFAULT_SQL_TIMEOUT"],
                        directory=self.temp_path,
                    ).run(query)

                except ValueError as message:
                    raise RunnerException(self.task, self.run_id, 20, message)

                except BaseException as message:
                    raise RunnerException(self.task, self.run_id, 20,
                                          f"Failed to run query.\n{message}")

            RunnerLog(
                self.task,
                self.run_id,
                8,
                f"Query completed.\nData file {self.source_files[0].name} created. Data size: {file_size(str(Path(self.source_files[0].name).stat().st_size))}.",
            )

        elif self.task.source_type_id == 2:  # smb file
            file_name = self.param_loader.insert_file_params(
                self.task.source_smb_file)
            file_name = DateParsing(
                task=self.task,
                run_id=self.run_id,
                date_string=file_name,
            ).string_to_date()

            self.source_files = Smb(
                task=self.task,
                run_id=self.run_id,
                connection=self.task.source_smb_conn,
                directory=self.temp_path,
            ).read(file_name=file_name)

        elif self.task.source_type_id == 3:  # sftp file
            RunnerLog(self.task, self.run_id, 9, "Loading data from server...")
            file_name = self.param_loader.insert_file_params(
                self.task.source_sftp_file)
            file_name = DateParsing(
                task=self.task,
                run_id=self.run_id,
                date_string=file_name,
            ).string_to_date()

            self.source_files = Sftp(
                task=self.task,
                run_id=self.run_id,
                connection=self.task.source_sftp_conn,
                directory=self.temp_path,
            ).read(file_name=file_name)

        elif self.task.source_type_id == 4:  # ftp file
            RunnerLog(self.task, self.run_id, 13,
                      "Loading data from server...")
            file_name = self.param_loader.insert_file_params(
                self.task.source_ftp_file)
            file_name = DateParsing(
                task=self.task,
                run_id=self.run_id,
                date_string=file_name,
            ).string_to_date()

            self.source_files = Ftp(
                task=self.task,
                run_id=self.run_id,
                connection=self.task.source_ftp_conn,
                directory=self.temp_path,
            ).read(file_name=file_name)

        elif self.task.source_type_id == 6:  # ssh command
            query = self.__get_query()

            Ssh(
                task=self.task,
                run_id=self.run_id,
                connection=self.task.source_ssh_conn,
                command=query,
            ).run()
    def save(self, overwrite: int,
             file_name: str) -> str:  # type: ignore[return]
        """Load data into network file path, creating location if not existing."""
        try:
            if self.connection is not None:
                dest_path = str(
                    Path(self.connection.path or "").joinpath(file_name))
            else:
                dest_path = str(
                    Path(
                        Path(sanitize_filename(self.task.project.name)) /
                        sanitize_filename(self.task.name) /
                        sanitize_filename(self.task.last_run_job_id) /
                        file_name))

            # path must be created one folder at a time.. the docs say the path will all be
            # created if not existing, but it doesn't seem to be the case :)
            my_dir = dest_path.split("/")[:-1]

            path_builder = ""
            for my_path in my_dir:
                path_builder += my_path + "/"

                try:
                    self.conn.listPath(self.share_name, path_builder)
                # pylint: disable=broad-except
                except OperationFailure:
                    self.conn.createDirectory(self.share_name, path_builder)

            # pylint: disable=useless-else-on-loop
            else:
                if overwrite != 1:
                    try:
                        # try to get security of the file. if it doesn't exist,
                        # we crash and then can create the file.
                        self.conn.getSecurity(self.share_name, dest_path)
                        RunnerLog(
                            self.task,
                            self.run_id,
                            10,
                            "File already exists and will not be loaded",
                        )
                        return dest_path

                    # pylint: disable=broad-except
                    except BaseException:
                        pass

                with open(str(self.dir.joinpath(file_name)), "rb",
                          buffering=0) as file_obj:
                    uploaded_size = self.conn.storeFile(
                        self.share_name, dest_path, file_obj)

            server_name = ("backup" if self.connection is None else
                           self.connection.server_name)

            RunnerLog(
                self.task,
                self.run_id,
                10,
                f"{file_size(uploaded_size)} uploaded to {server_name} server.",
            )

            return dest_path

        # pylint: disable=broad-except
        except BaseException as e:
            raise RunnerException(self.task, self.run_id, 10,
                                  f"Failed to save file on server.\n{e}")
    def __pip_install(self) -> None:
        r"""Get includes from script.

        get import (...)
        (?<=^import)\s+[^\.][^\s]+?\\s+?$

        get import (...) as ...
        (?<=^import)\s+[^\.][^\s]+?(?=\s)

        get from (...) imoprt (...)
        (?<=^from)\s+[^\.].+?(?=import)
        """
        try:
            imports = []

            # find all scripts in dir, but not in venv
            paths = list(
                set(Path(self.job_path).rglob("*.py")) -
                set(Path(self.env_path).rglob("*.py")))

            for this_file in paths:
                with open(this_file, "r") as my_file:
                    for line in my_file:

                        imports.extend(
                            re.findall(r"(?<=^import)\s+[^\.][^\s]+?\s+?$",
                                       line))
                        imports.extend(
                            re.findall(r"(?<=^from)\s+[^\.].+?(?=import)",
                                       line))
                        imports.extend(
                            re.findall(r"(?<=^import)\s+[^\.][^\s]+?(?=\s)",
                                       line))

            package_map = {"dateutil": "python-dateutil", "smb": "pysmb"}

            # clean list
            imports = [
                str(
                    package_map.get(x.strip().split(".")[0],
                                    x.strip().split(".")[0])) for x in imports
                if x.strip() != ""
            ]

            # remove any relative imports
            names = [my_file.stem for my_file in paths]

            imports = list(set(imports) - set(names))

            # remove preinstalled packages from imports
            cmd = f'"{self.env_path}/bin/python" -c "help(\'modules\')"'
            built_in_packages = Cmd(
                task=self.task,
                run_id=self.run_id,
                cmd=cmd,
                success_msg="Python packages loaded.",
                error_msg="Failed to get preloaded packages: " + "\n" + cmd,
            ).shell()

            built_in_packages = built_in_packages.split(
                "Please wait a moment while I gather a list of all available modules..."
            )[1].split("Enter any module name to get more help.")[0]

            cleaned_built_in_packages = [
                this_out.strip() for this_out in list(
                    chain.from_iterable([
                        g.split(" ") for g in built_in_packages.split("\n")
                        if g != ""
                    ])) if this_out.strip() != ""
            ]

            # remove default python packages from list
            imports = [
                x.strip() for x in imports
                if x not in cleaned_built_in_packages and x.strip()
            ]

            # try to install
            if len(imports) > 0:
                cmd = (
                    f'"{self.env_path}/bin/pip" install --disable-pip-version-check --quiet '
                    + " ".join([str(x) for x in imports]))
                Cmd(
                    task=self.task,
                    run_id=self.run_id,
                    cmd=cmd,
                    success_msg="Imports succesfully installed: " +
                    ", ".join([str(x) for x in imports]) + " with command: " +
                    "\n" + cmd,
                    error_msg="Failed to install imports with command: " +
                    "\n" + cmd,
                ).shell()

        except BaseException as e:
            raise RunnerException(
                self.task,
                self.run_id,
                14,
                f"Failed to install packages.\n{self.base_path}\n{e}",
            )