def getFirstToken():
    configuration, configuration_settings = readJSON()
    if (configuration):
        token = generateToken_pw_flow(configuration_settings)
        return token
    else:
        logger.error(
            "Check your configuration file (config.json) for correct format and valid parameters"
        )
        return False
示例#2
0
def checkStatus(configuration_settings, token, analyzerId):
    # Make request
    headers = {'Authorization': 'Bearer {}'.format(token)}
    get_url = configuration_settings['aca_main_url'] + "/{0}".format(
        analyzerId)
    try:
        response = requests.request(
            "GET",
            get_url,
            headers=headers,
            verify=configuration_settings['ssl_verification'])
        if response.status_code >= 400:
            logger.error("An error occurred while trying to get file status.")
            return False, response.text
        else:
            response_json = json.loads(response.text)
            if "result" in response_json:
                return True, response_json["result"][0]
            else:
                return False, {}
    except SSLError as sslerror:
        logger.error(
            "SSL error was thrown due to certificate failure, set ssl_verification to false in configuration config.json file."
        )
        logger.debug(sslerror, exc_info=True)
    except Exception as ex:
        print(ex)
        logger.error("An error occured when trying to get file")
        logger.debug(ex, exc_info=True)
        pass
    return False, {}
示例#3
0
def deleteFile(configuration_settings, analyzerId):
    # Make requests
    headers = {'apiKey': configuration_settings['api_key']}

    if "function_id" in configuration_settings:
        credentials = ('%s:%s' % (configuration_settings["function_id"],
                                  configuration_settings["password"]))
        encoded_credentials = b64encode(credentials.encode('ascii'))
        headers['Authorization'] = 'Basic %s' % encoded_credentials.decode(
            "utf-8")

    get_url = configuration_settings['main_url'] + "/{0}".format(analyzerId)
    try:
        response = requests.request(
            "DELETE",
            get_url,
            headers=headers,
            verify=configuration_settings['ssl_verification'])
        if response.status_code >= 400:
            logger.error("An error occurred while trying to get file status.")
            return False, response.text
        else:
            response_json = json.loads(response.text)
            return True, response_json
    except SSLError as sslerror:
        logger.error(
            "SSL error was thrown due to certificate failure, set ssl_verification to false in configuration config.json file."
        )
        logger.debug(sslerror, exc_info=True)
    except Exception as ex:
        print(ex)
        logger.error("An error occured when trying to get file")
        logger.debug(ex, exc_info=True)
        pass
    return False, {}
def generateToken_pw_flow(config):
    zen_token_url = 'https://' + config['zen_host'].strip(
    ) + '/v1/preauth/validateAuth'
    zen_username = config['zen_username'].strip()
    zen_pw = config['zen_password'].strip()
    try:
        encoding = base64.b64encode('{}:{}'.format(zen_username,
                                                   zen_pw).encode())
        headers = {
            "authorization": "Basic " + encoding.decode("utf-8"),
            'Content-Type': 'application/x-www-form-urlencoded'
        }

        response = requests.request("GET",
                                    zen_token_url,
                                    headers=headers,
                                    verify=config['ssl_verification'])

        if response.status_code == 200:
            # token is generated
            # {
            #     "accessToken": "xxxxxxxxx"
            # }
            content = getContent(response)
            # print(content)
            token = content.get('accessToken')
            logger.info('Successfully got the token')
            json.dump({"zen_token": token}, open("output.json", 'w'), indent=4)
            return token
        else:
            logger.error("Failed to generate a token")
            content = getContent(response)
            logger.debug(content)
            return None
    except Exception as ex:
        logger.error("Failed to generate a token due to exception")
        logger.debug(str(ex))
        return None
def checkTokenValid(token, config):
    if token:
        if config:
            zen_check_token_url = 'https://' + config['zen_host'].strip(
            ) + '/usermgmt/v1/user/currentUserInfo'
            try:
                headers = {
                    "authorization": "Bearer " + token,
                    'Content-Type': 'application/x-www-form-urlencoded'
                }

                response = requests.request("GET",
                                            zen_check_token_url,
                                            headers=headers,
                                            verify=config['ssl_verification'])

                if response.status_code == 200:
                    # content = getContent(response)
                    # res = content.get('uid')
                    logger.info('zen token provided is valid')
                    return token
                elif response.status_code == 401:
                    logger.info(
                        'zen token provided is invalid or expired, trying to generate a new one'
                    )
                    token = generateToken_pw_flow(config)
                    return token
                else:
                    logger.error("Failed to check the token valid or not")
                    content = getContent(response)
                    logger.debug(content)
                    return None
            except Exception as ex:
                logger.error(
                    "Failed to check the token valid or not due to exception")
                logger.debug(str(ex))
                return None
        else:
            logger.error(
                "Check your configuration file (config.json) for correct format and valid parameters"
            )
            return None
    else:
        logger.error("Zen token is required, please provide a valid token")
        return None
示例#6
0
def downloadFile(configuration_settings, analyzerId, output, output_path, filename,bearerToken):
    # Make request
    headers = {
        'apiKey': configuration_settings['api_key']
    }

    if "function_id" in configuration_settings:
        credentials = ('%s:%s' % (
            configuration_settings["function_id"], configuration_settings["password"]))
        encoded_credentials = b64encode(credentials.encode('ascii'))
        #headers['Authorization'] = 'Basic %s' % encoded_credentials.decode("utf-8")
        #headers['Authorization'] = configuration_settings["bear_token"]
        headers['Authorization'] = bearerToken
    extra_path = "{0}/{1}/".format(analyzerId, output.lower())

    get_url = urlparse.urljoin(configuration_settings['main_url'], extra_path)
    try:
        response = requests.request("GET", get_url, headers=headers, verify=configuration_settings['ssl_verification'])
        if response.status_code >= 400:
            logger.error("An error occurred while trying to get file status.")
            return False, response.text
        else:
            file_name = copy.copy(filename)
            filename_output = "{0}.{1}".format(file_name, output.lower()) if output.lower() != "utf8" else "{0}.txt".format(file_name)
            filename_output = filename_output if output.lower() != "pdf" else "New_{0}.{1}".format(file_name, output.lower())
            file_output_path = os.path.join(configuration_settings["output_directory_path"], output_path, output.lower())
            filename_output_path = os.path.join(file_output_path, filename_output)
            if( not os.path.exists(file_output_path)):
                os.makedirs(file_output_path)
                logger.info("Created new output directory, "+ file_output_path)
            if(output.lower() == "json"):
                response_output = json.loads(response.text)
                if "data" in response_output:
                    json.dump(response_output["data"], open(filename_output_path, "w"), indent=4)
            elif output.lower() == "pdf":
                output_write = open(filename_output_path, "wb")
                output_write.write(response.content)
                output_write.close()
            else:
                output_write = open(filename_output_path, "w")
                try:
                    output_write.write(response.text)
                except:
                    output_write.write(response.text.encode('utf-8'))
                output_write.close()
            return True, ""
    except SSLError as sslerror:
        logger.error("SSL error was thrown due to certificate failure, set ssl_verification to false in configuration config.json file.")
        logger.debug(sslerror, exc_info=True)
    except Exception as ex:
        logger.error("An error occurred when trying to get file")
        logger.debug(ex, exc_info=True)
        pass
    return False, ""
示例#7
0
def downloadFile(configuration_settings, token, analyzerId, output,
                 output_path, filename):
    # Make request
    headers = {'Authorization': 'Bearer {}'.format(token)}

    extra_path = "/{0}/{1}".format(analyzerId, output.lower())
    get_url = configuration_settings['aca_main_url'] + extra_path
    try:
        response = requests.request(
            "GET",
            get_url,
            headers=headers,
            verify=configuration_settings['ssl_verification'])
        # print(response.text)
        if response.status_code >= 400:
            logger.error("An error occurred while trying to download output.")
            return False, response.text
        else:
            file_name = copy.copy(filename)
            filename_output = "{0}.{1}".format(file_name, output.lower(
            )) if output.lower() != "utf8" else "{0}.txt".format(file_name)
            filename_output = filename_output if output.lower(
            ) != "pdf" else "New_{0}.{1}".format(file_name, output.lower())
            file_output_path = os.path.join(
                configuration_settings["output_directory_path"], output_path,
                output.lower())
            filename_output_path = os.path.join(file_output_path,
                                                filename_output)
            if (not os.path.exists(file_output_path)):
                os.makedirs(file_output_path)
                logger.info("Created new output directory, " +
                            file_output_path)
            if (output.lower() == "json"):
                response_output = json.loads(response.text)
                if "result" in response_output and "data" in response_output[
                        "result"][0]:
                    json.dump(response_output["result"][0]["data"],
                              open(filename_output_path, "w"),
                              indent=4)
            elif output.lower() == "pdf":
                output_write = open(filename_output_path, "wb")
                output_write.write(response.content)
                output_write.close()
            return True, ""
    except SSLError as sslerror:
        logger.error(
            "SSL error was thrown due to certificate failure, set ssl_verification to false in configuration config.json file."
        )
        logger.debug(sslerror, exc_info=True)
    except Exception as ex:
        logger.error("An error occurred when trying to get file")
        logger.debug(ex, exc_info=True)
        pass
    return False, ""
def deleteFiles(bearerToken):
    configuration, configuration_settings = readJSON()
    if (configuration):
        output_json_path = os.path.join(os.getcwd(), "output.json")
        if (os.path.exists(output_json_path)):
            output_json = json.load(open(output_json_path, "r"))
            if ("output_results" in output_json
                    and len(output_json["output_results"]) > 0):
                output_json_result = output_json["output_results"]
                new_output_json_result = []
                for outresult in output_json_result:
                    result = outresult
                    try:
                        if "response" in result and "data" in json.loads(result["response"]) and "analyzerId" in \
                                json.loads(result["response"])["data"]:
                            if ("deleted" in result and result["deleted"]
                                    == False) or "deleted" not in result:
                                response = json.loads(result["response"])
                                analyzerId = response["data"]["analyzerId"]
                                status, result_response = deleteFile(
                                    configuration_settings, analyzerId,
                                    bearerToken)
                                if (status):
                                    result["deleted"] = True
                                else:
                                    result["deleted"] = False
                    except:
                        result["deleted"] = False
                        logger.error(
                            "No analyzerID available to delete results. The file upload may have failed. File name: {0}"
                            .format(result["filename"]))

                    # new_output_json_result.append(result)
                    output_json["output_results"] = output_json_result
                    json.dump(output_json, open("output.json", 'w'), indent=4)

                logger.info("Done deleting files on the server")
                logger.info("Delete status reported in output.json")
            else:
                logger.error("No results found that needs deleting.")
        else:
            logger.error(
                "output.json file does not exist. No results available to delete."
            )
示例#9
0
def checkStatus(configuration_settings, analyzerId,bearerToken):
    logger.info("Config check")
    # Make request
    headers = {
        'apiKey': configuration_settings['api_key']
    }
    logger.info("Config check" +  configuration_settings['api_key'])
    if "function_id" in configuration_settings:
            credentials = ('%s:%s' % (
            configuration_settings["function_id"], configuration_settings["password"]))
            encoded_credentials = b64encode(credentials.encode('ascii'))
            #headers['Authorization'] = 'Basic %s' % encoded_credentials.decode("utf-8")
            #headers['Authorization'] = configuration_settings["bear_token"]
            headers['Authorization'] = bearerToken

    get_url = urlparse.urljoin(configuration_settings['main_url'], "{0}/".format(analyzerId))
    logger.info("Config check " + get_url)
    try:
        response = requests.request("GET", get_url, headers=headers, verify=configuration_settings['ssl_verification'])
        logger.info(response.status_code)
        if response.status_code >= 400:

            logger.error("An error occurred while trying to get file status.")
            return False, response.text
        else:
            response_json = json.loads(response.text)
            return True, response_json
    except SSLError as sslerror:
        logger.error("SSL error was thrown due to certificate failure, set ssl_verification to false in configuration config.json file.")
        logger.debug(sslerror, exc_info=True)
    except Exception as ex:
        print(ex)
        logger.error("An error occured when trying to get file")
        logger.debug(ex, exc_info=True)
        pass
    return False, {}
def deleteFiles(token):
    configuration, configuration_settings = readJSON()
    if (configuration):
        token = checkTokenValid(token, configuration_settings)
        if token:
            starttime = dt.datetime.now()
            output_json_path = os.path.join(os.getcwd(), "output.json")
            if (os.path.exists(output_json_path)):
                output_json = json.load(open(output_json_path, "r"))
                if ("output_results" in output_json
                        and len(output_json["output_results"]) > 0):
                    output_json_result = output_json["output_results"]
                    new_output_json_result = []
                    for outresult in output_json_result:
                        result = outresult
                        try:
                            if "response" in result and "result" in json.loads(
                                    result["response"]
                            ) and "data" in json.loads(
                                    result["response"]
                            )["result"][0] and "analyzerId" in json.loads(
                                    result["response"])["result"][0]["data"]:
                                if ("deleted" in result and result["deleted"]
                                        == False) or "deleted" not in result:
                                    response = json.loads(result["response"])
                                    # print(response)
                                    analyzerId = response["result"][0]["data"][
                                        "analyzerId"]

                                    current_time = dt.datetime.now()
                                    seconds = (current_time -
                                               starttime).total_seconds()
                                    if seconds < 7000 * 5:  # refresh zen token every 10 hours (7199 = 2 hours)
                                        if token:
                                            status, result_response = deleteFile(
                                                configuration_settings, token,
                                                analyzerId)
                                            if (status):
                                                result["deleted"] = True
                                            else:
                                                result["deleted"] = False
                                        else:
                                            token = generateToken_pw_flow(
                                                configuration_settings)
                        except:
                            result["deleted"] = False
                            logger.error(
                                "No analyzerID available to delete results. The file upload may have failed. File name: {0}"
                                .format(result["filename"]))

                        # new_output_json_result.append(result)
                        output_json["output_results"] = output_json_result
                        json.dump(output_json,
                                  open("output.json", 'w'),
                                  indent=4)

                    logger.info("Done deleting files on the server")
                    logger.info("Delete status reported in output.json")
                else:
                    logger.error("No results found that needs deleting.")
            else:
                logger.error(
                    "output.json file does not exist. No results available to delete."
                )
        else:
            logger.error("Zen token is required to delete files")
def executeRules():
    configuration, configuration_settings = readJSON()
    if (configuration):
        starttime = dt.datetime.now()
        dir_path = configuration_settings["output_directory_path"]
        count = 0
        errors = []
        output_results = []
        for subdir, dirs, files in os.walk(dir_path):
            for file in files:
                file_path = os.path.join(subdir, file)
                new_file = copy.copy(file)
                #print(file)
                file_split = new_file.rsplit(".")
                if (file_split[1] == "json"):
                    odm_file_value = str(file_split[0]) + "ODM.json"
                    file_extension = str(file_split[-1].strip())

                    old_file_name = new_file.replace("." + file_extension,
                                                     '').strip()
                    file_name = re.sub(
                        '[^A-Za-z0-9 _]+', ' ',
                        old_file_name).strip() + "." + str(file_extension)
                    new_file_path = os.path.join(subdir, file_name)
                    #print(new_file_path)
                    f = open(new_file_path, "r")
                    # deserializes into dict
                    # and returns dict.
                    y = json.loads(f.read())
                    #json.append()
                    odmData = {
                        '__DecisionID__': "myworld",
                        "document_in": y,
                        "metaData_inout": {
                            "dynamicParams": "string"
                        }
                    }

                    #var rex = new RegExp('Sensitivity":false', 'g');

                    #var BACA_STRING_RESPONSE = JSON.stringify(res.data)
                    #BACA_STRING_RESPONSE = BACA_STRING_RESPONSE.replace(rex, 'Sensitivity":0');

                    odmJson = json.dumps(odmData)
                    #print (odmJson)
                    odmJson = odmJson.replace('"Sensitivity": false',
                                              '"Sensitivity":0')

                    odmJson = odmJson.replace('Language": ""', 'Language":[]')

                    #odmpayload = {"__DecisionID__": "string","document_in": y, "metaData_inout": {"dynamicParams": "" }}
                    authPayload = HTTPBasicAuth(
                        configuration_settings["odmUser"],
                        configuration_settings["odmPassword"])

                    ODM_ProcessBacaDocument_ServiceURL = configuration_settings[
                        "odm_url"]
                    #JsonPayload = odmpayload
                    headers = {'content-type': 'application/json'}
                    #print(odmData)
                    try:
                        response = requests.request(
                            "POST",
                            ODM_ProcessBacaDocument_ServiceURL,
                            headers=headers,
                            data=odmJson,
                            auth=authPayload,
                            verify=False)
                        print(response.text)
                        jsonPayloadODM = json.loads(response.text)
                        fileOutPutPath = configuration_settings[
                            "odm_output_directory_path"] + "/" + odm_file_value
                        with open(fileOutPutPath, 'w') as outfile:
                            json.dump(jsonPayloadODM, outfile)
                        #return response
                    except SSLError as sslerror:
                        logger.error(
                            "SSL error was thrown due to certificate failure, set ssl_verification to false in configuration config.json file."
                        )
                        #dict_object.update({"error": str(sslerror)})
                        #errors.append(dict_object)
                        return False
                    except Exception as ex:
                        #dict_object.update({"error": str(ex)})
                        #errors.append(dict_object)
                        #logger.error("An error occurred when trying to upload file " + file_path)
                        logger.debug(ex, exc_info=True)
                        pass

                    f.close()
        return True
示例#12
0
def downloadFiles():
    pending_completion = True
    configuration, configuration_settings = readJSON()
    if (configuration):
        loop = 0
        failed_download = []
        completed_download = []
        completed_count = 0
        output_json_path = os.path.join(os.getcwd(), "output.json")
        if (os.path.exists(output_json_path)):
            while pending_completion and loop < 1000:
                output_json = json.load(open(output_json_path, "r"))
                loop += 1
                logger.info("Loop " + str(loop))
                if ("output_results" in output_json
                        and len(output_json["output_results"]) > 0):
                    output_json_result = output_json["output_results"]
                    new_output_json_result = []
                    for outresult in output_json_result:
                        result = outresult
                        try:
                            if "response" in result and "data" in json.loads(
                                    result["response"]
                            ) and "analyzerId" in json.loads(
                                    result["response"])["data"]:
                                if "download_success" not in result:
                                    response = json.loads(result["response"])
                                    path = result["path"]
                                    filename = result["filename"]
                                    analyzerId = response["data"]["analyzerId"]

                                    output_outputs = result["output_type"]
                                    latest_output_outputs = [
                                        output.replace("\"", "").upper()
                                        for output in output_outputs
                                    ]
                                    if ("download_completed" not in result):
                                        completed = checkCompleted(
                                            latest_output_outputs, result)
                                        if (completed):
                                            result["download_completed"] = True
                                        else:
                                            logger.info(
                                                "Checking status of analyzerId: "
                                                + analyzerId)
                                            status, result_response = checkStatus(
                                                configuration_settings,
                                                analyzerId)
                                            if (status):
                                                if ("data" in result_response
                                                        and "statusDetails" in
                                                        result_response["data"]
                                                    ):
                                                    status_result_response = result_response[
                                                        "data"][
                                                            "statusDetails"]

                                                    done_output = []
                                                    for output in status_result_response:
                                                        if (output["type"] in
                                                                latest_output_outputs
                                                                and output[
                                                                    "status"]
                                                                == "Completed"
                                                                and
                                                                output["type"]
                                                                not in result):
                                                            logger.info(
                                                                "Downloading {0} of analyzerId: {1}"
                                                                .format(
                                                                    output[
                                                                        "type"],
                                                                    analyzerId
                                                                ))
                                                            response, reason = downloadFile(
                                                                configuration_settings,
                                                                analyzerId,
                                                                output["type"],
                                                                path,
                                                                filename.
                                                                rsplit(".")[0])

                                                            result[output[
                                                                "type"]] = response
                                                            if (not response):
                                                                result[
                                                                    output[
                                                                        "type"]
                                                                    +
                                                                    "_error"] = reason
                                                            done_output.append(
                                                                output["type"])
                                                        elif (output["type"] in
                                                              latest_output_outputs
                                                              and
                                                              output["status"]
                                                              == "Failed" and
                                                              output["type"]
                                                              not in result):

                                                            result[output[
                                                                "type"]] = False
                                                            result[
                                                                output["type"]
                                                                +
                                                                "_error"] = output[
                                                                    "status"]
                                                            done_output.append(
                                                                output["type"])
                                                        elif output[
                                                                "type"] in result:
                                                            done_output.append(
                                                                output["type"])
                                                    if (len(
                                                            done_output
                                                    ) == len(
                                                            latest_output_outputs
                                                    )):
                                                        completed_download.append(
                                                            True)

                                                    completed = checkCompleted(
                                                        latest_output_outputs,
                                                        result)
                                                    if (completed):
                                                        result[
                                                            "download_completed"] = True

                                            else:
                                                result[
                                                    "download_success"] = False
                                                result[
                                                    "download_failure_reason"] = result_response
                                                failed_download.append(True)
                            else:
                                logger.error(
                                    "We could not find any information to download files from."
                                )
                                result["download_success"] = False
                                result[
                                    "download_failure_reason"] = "No available data to download from"
                                failed_download.append(True)
                        except:
                            result["download_success"] = False
                            result[
                                "download_failure_reason"] = "No analyzerID available to download results"
                            logger.error(
                                "No analyzerID available to download results. The file upload may have failed. File name: {0}"
                                .format(result["filename"]))
                            failed_download.append(True)

                        # new_output_json_result.append(result)
                        # print(len(new_output_json_result))
                        endtime = dt.datetime.now()
                        output_json["output_results"] = output_json_result
                        output_json["endtime"] = str(endtime)
                        json.dump(output_json,
                                  open("output.json", 'w'),
                                  indent=4)
                        completed_count = len(failed_download) + len(
                            completed_download)

                    if (completed_count >= len(output_json_result)):
                        pending_completion = False
                    else:
                        if (loop >= 1000):
                            pending_completion = False
                            logger.error(
                                "Reached maximum number of download retries.")
                        else:
                            time.sleep(5)
                else:
                    pending_completion = False
                    logger.error("No results available to download.")
                    return True

            else:
                logger.info(
                    "Done downloading all output files to your output_directory_path"
                )
                logger.info("Download status reported in output.json")
                if (loop < 999):
                    return True
                else:
                    return False
        else:
            logger.error(
                "output.json file does not exist. No results available to download."
            )

            return False
                return None
        else:
            logger.error(
                "Check your configuration file (config.json) for correct format and valid parameters"
            )
            return None
    else:
        logger.error("Zen token is required, please provide a valid token")
        return None


def getFirstToken():
    configuration, configuration_settings = readJSON()
    if (configuration):
        token = generateToken_pw_flow(configuration_settings)
        return token
    else:
        logger.error(
            "Check your configuration file (config.json) for correct format and valid parameters"
        )
        return False


if __name__ == '__main__':
    logger.info("Logs can be found in current directory (processing.log)")
    token = None
    try:
        getFirstToken()
    except Exception as ex:
        logger.error("An error occurred, please check logs for more details")
        logger.debug(ex, exc_info=True)
def updateReport():
    output_json_path = os.path.join(os.getcwd(), "output.json")
    if (os.path.exists(output_json_path)):
        output_json = json.load(open(output_json_path, "r"))
        new_output_json_result = []
        unfinished_files = []
        unfinished_files_with_path = []
        new_file_upload_errors = [
            output["filename"] for output in output_json["upload_errors"]
        ] if "upload_errors" in output_json else []
        new_file_errors = [
            output["filename"] for output in output_json["errors"]
        ] if "errors" in output_json else []
        unfinished_files.extend(new_file_upload_errors)
        unfinished_files.extend(new_file_errors)
        new_file_upload_errors_with_path = [
            {
                "filename": output["filename"],
                "path": output["path"]
            } for output in output_json["upload_errors"]
        ] if "upload_errors" in output_json else []
        new_file_errors_with_path = [{
            "filename": output["filename"],
            "path": output["path"]
        } for output in output_json["errors"]
                                     ] if "errors" in output_json else []
        unfinished_files_with_path.extend(new_file_upload_errors_with_path)
        unfinished_files_with_path.extend(new_file_errors_with_path)
        endtime = dt.datetime.now()
        starttime = parser.parse(output_json["starttime"])

        if ("output_results" in output_json):
            output_json_result = output_json["output_results"]
            if (len(output_json["output_results"]) > 0):
                for outresult in output_json_result:
                    result = outresult
                    output_outputs = result["output_type"]
                    latest_output_outputs = [
                        output.replace("\"", "").upper()
                        for output in output_outputs
                    ]
                    unfinished_object = {
                        "filename":
                        result["filename"],
                        "path":
                        result["path"] if "path" in result else "",
                        "full_path":
                        result["full_path"] if "full_path" in result else ""
                    }
                    try:
                        if ("download_success" in result
                                and result["download_success"] == False):
                            if (result["filename"] not in unfinished_files):
                                unfinished_files.append(result["filename"])
                                unfinished_files_with_path.append(
                                    unfinished_object)
                        else:
                            output_count = 0
                            for output in latest_output_outputs:
                                if (output not in result):
                                    if (result["filename"]
                                            not in unfinished_files):
                                        unfinished_files.append(
                                            result["filename"])
                                        unfinished_files_with_path.append(
                                            unfinished_object)
                                if output in result:
                                    output_count += 1
                                    if result[output] == False:
                                        if (result["filename"]
                                                not in unfinished_files):
                                            unfinished_files.append(
                                                result["filename"])
                                            unfinished_files_with_path.append(
                                                unfinished_object)
                            if output_count == 0:
                                if (result["filename"]
                                        not in unfinished_files):
                                    unfinished_files.append(result["filename"])
                                    unfinished_files_with_path.append(
                                        unfinished_object)

                    except:
                        result["deleted"] = False
            # new_output_json_result.append(result)
            seconds = (endtime - starttime).total_seconds()
            output_json["endtime"] = str(endtime)
            output_json["total_seconds"] = str(seconds)
            output_json["unfinished_files"] = unfinished_files_with_path
            output_json["no_of_unfinished_files"] = len(unfinished_files)

            output_json["output_results"] = output_json_result
            json.dump(output_json, open("output.json", 'w'), indent=4)
            logger.info(
                "Done updating report in the current directory, output.json")
        else:
            logger.error("No results available for reporting")

    else:
        logger.error(
            "output.json file does not exist. No results available for reporting."
        )
示例#15
0
def uploadFiles():
    configuration, configuration_settings = readJSON()
    if (configuration):
        starttime = dt.datetime.now()
        dir_path = configuration_settings["directory_path"]
        count = 0

        errors = []
        upload_url = configuration_settings['main_url']
        accepted_extensions = configuration_settings["accepted_extensions"]
        file_types = configuration_settings[
            'file_type'] if 'file_type' in configuration_settings and type(
                configuration_settings['file_type'] is list) and len(
                    configuration_settings['file_type']
                ) > 0 else accepted_extensions
        file_types = [f_type.lower() for f_type in file_types]
        output_results = []
        for subdir, dirs, files in os.walk(dir_path):
            for file in files:
                file_path = os.path.join(subdir, file)
                new_file = copy.copy(file)

                file_split = new_file.rsplit(".")
                file_extension = str(file_split[-1].strip())
                old_file_name = new_file.replace("." + file_extension,
                                                 '').strip()
                file_name = re.sub(
                    '[^A-Za-z0-9 _]+', ' ',
                    old_file_name).strip() + "." + str(file_extension)
                new_file_path = os.path.join(subdir, file_name)
                if (file_extension != ""
                        and file_extension.lower() in accepted_extensions
                        and file_extension.lower() in file_types):
                    count += 1
                    try:
                        logger.info("Uploading {0} ".format(new_file_path))
                    except:
                        pass

                    files = {
                        'file': (file_name, open(file_path,
                                                 'rb'), "multipart/form-data")
                    }
                    headers = {'apiKey': configuration_settings['api_key']}

                    if "function_id" in configuration_settings:
                        credentials = ('%s:%s' %
                                       (configuration_settings["function_id"],
                                        configuration_settings["password"]))
                        encoded_credentials = b64encode(
                            credentials.encode('ascii'))
                        headers[
                            'Authorization'] = 'Basic %s' % encoded_credentials.decode(
                                "utf-8")

                    dict_object = {
                        "filename":
                        file_name,
                        "path":
                        os.path.basename(subdir),
                        "full_path":
                        os.path.join(os.path.abspath(subdir),
                                     old_file_name + "." + str(file_extension))
                    }

                    # Make request
                    try:
                        response = requests.request(
                            "POST",
                            upload_url,
                            files=files,
                            data={
                                'jsonOptions':
                                configuration_settings['json_options'],
                                'responseType':
                                configuration_settings['output_options']
                            },
                            headers=headers,
                            verify=configuration_settings['ssl_verification'])
                        if response.status_code >= 400:
                            logger.error(
                                "HTTP error {0} occurred when uploading file: {1} "
                                .format(str(response.status_code), file_path))
                            dict_object.update({"error": response.text})
                            errors.append(dict_object)
                        else:
                            dict_object.update({
                                "response":
                                response.text,
                                "output_type":
                                configuration_settings['output_options'].split(
                                    ",")
                            })
                            output_results.append(dict_object)

                    except SSLError as sslerror:
                        logger.error(
                            "SSL error was thrown due to certificate failure, set ssl_verification to false in configuration config.json file."
                        )
                        dict_object.update({"error": str(sslerror)})
                        errors.append(dict_object)
                    except Exception as ex:
                        dict_object.update({"error": str(ex)})
                        errors.append(dict_object)
                        logger.error(
                            "An error occurred when trying to upload file " +
                            file_path)
                        logger.debug(ex, exc_info=True)
                        pass

                endtime = dt.datetime.now()
                seconds = (endtime - starttime).total_seconds()
                result = {
                    "starttime": str(starttime),
                    "endtime": str(endtime),
                    "no_of_files": count,
                    "output_results": output_results,
                    "no_output_results": len(output_results),
                    "total_upload_seconds": seconds,
                    "upload_errors": errors,
                    "no_of_upload_errors": len(errors)
                }
                json.dump(result,
                          open(os.path.join(os.getcwd(), "output.json"), 'w'),
                          indent=4)
        logger.info("Upload status reported in output.json")
        if count == 0:
            logger.error("No files found in directory_path, " + dir_path)
            return False
        elif len(errors) > 0:
            logger.error("Errors occurred during upload.")
            return True
        else:
            logger.info("Done uploading {0} files".format(count))
            return True
    else:
        logger.error(
            "Check your configuration file (config.json) for correct format and valid parameters"
        )
        return False
def uploadFiles(token):
    configuration, configuration_settings = readJSON()
    if (configuration):
        token = checkTokenValid(token, configuration_settings)
        if token:
            starttime = dt.datetime.now()
            dir_path = configuration_settings["directory_path"]
            count = 0

            errors = []
            upload_url = configuration_settings['aca_main_url']
            logger.info("ACA main url is: {0} ".format(upload_url))
            accepted_extensions = configuration_settings["accepted_extensions"]
            file_types = configuration_settings[
                'file_type'] if 'file_type' in configuration_settings and type(
                    configuration_settings['file_type'] is list) and len(
                        configuration_settings['file_type']
                    ) > 0 else accepted_extensions
            file_types = [f_type.lower() for f_type in file_types]
            output_results = []
            for subdir, dirs, files in os.walk(dir_path):
                for file in files:
                    file_path = os.path.join(subdir, file)
                    new_file = copy.copy(file)

                    file_split = new_file.rsplit(".")
                    file_extension = str(file_split[-1].strip())
                    old_file_name = new_file.replace("." + file_extension,
                                                     '').strip()
                    file_name = re.sub(
                        '[^A-Za-z0-9 _]+', ' ',
                        old_file_name).strip() + "." + str(file_extension)
                    new_file_path = os.path.join(subdir, file_name)
                    if (file_extension != ""
                            and file_extension.lower() in accepted_extensions
                            and file_extension.lower() in file_types):
                        count += 1
                        try:
                            logger.info("Uploading {0} ".format(new_file_path))
                        except:
                            pass

                        files = {
                            'file':
                            (file_name, open(file_path,
                                             'rb'), "multipart/form-data")
                        }
                        # print(files)
                        dict_object = {
                            "filename":
                            file_name,
                            "path":
                            os.path.basename(subdir),
                            "full_path":
                            os.path.join(
                                os.path.abspath(subdir),
                                old_file_name + "." + str(file_extension))
                        }

                        # Make request
                        try:
                            current_time = dt.datetime.now()
                            seconds = (current_time -
                                       starttime).total_seconds()
                            if seconds < 7000 * 5:  # refresh zen token every 10 hours (7199 = 2 hours)
                                if token:
                                    headers = {
                                        'Authorization':
                                        'Bearer {}'.format(token)
                                    }
                                    response = requests.request(
                                        "POST",
                                        upload_url,
                                        files=files,
                                        data={
                                            'jsonOptions':
                                            configuration_settings[
                                                'json_options'],
                                            'responseType':
                                            configuration_settings[
                                                'output_options']
                                        },
                                        headers=headers,
                                        verify=configuration_settings[
                                            'ssl_verification'])
                                    if response.status_code >= 400:
                                        logger.error(
                                            "HTTP error {0} occurred when uploading file: {1} "
                                            .format(str(response.status_code),
                                                    file_path))
                                        print(response.text)
                                        error = response.text if response.status_code == 500 else json.loads(
                                            response.text)
                                        logger.error(
                                            "Error details: {}".format(error))
                                        dict_object.update(
                                            {"error": response.text})
                                        errors.append(dict_object)
                                    else:
                                        dict_object.update({
                                            "response":
                                            response.text,
                                            "output_type":
                                            configuration_settings[
                                                'output_options'].split(",")
                                        })
                                        output_results.append(dict_object)
                                else:
                                    message = "Zen token is required to upload the files, filename {}".format(
                                        file_name)
                                    logger.error(message)
                                    error.append({'error': message})
                            else:
                                token, checked_time = generateToken_pw_flow(
                                    configuration_settings)

                        except SSLError as sslerror:
                            logger.error(
                                "SSL error was thrown due to certificate failure, set ssl_verification to false in configuration config.json file."
                            )
                            dict_object.update({"error": str(sslerror)})
                            errors.append(dict_object)
                        except Exception as ex:
                            dict_object.update({"error": str(ex)})
                            errors.append(dict_object)
                            logger.error(
                                "An error occurred when trying to upload file "
                                + file_path)
                            logger.debug(ex, exc_info=True)
                            pass

                    endtime = dt.datetime.now()
                    seconds = (endtime - starttime).total_seconds()
                    result = {
                        "zen_token": token,
                        "starttime": str(starttime),
                        "endtime": str(endtime),
                        "no_of_files": count,
                        "output_results": output_results,
                        "no_output_results": len(output_results),
                        "total_upload_seconds": seconds,
                        "upload_errors": errors,
                        "no_of_upload_errors": len(errors)
                    }
                    json.dump(result,
                              open(os.path.join(os.getcwd(), "output.json"),
                                   'w'),
                              indent=4)

            logger.info("Upload status reported in output.json")
            if count == 0:
                logger.error("No files found in directory_path, " + dir_path)
                return False
            elif len(errors) > 0:
                logger.error("Errors occurred during upload.")
                return True
            else:
                logger.info("Done uploading {0} files".format(count))
                return True
        else:
            logger.error("Zen token is required to upload the files")
            return False
    else:
        logger.error(
            "Check your configuration file (config.json) for correct format and valid parameters"
        )
        return False
示例#17
0
def downloadFiles(token):
    pending_completion = True
    configuration, configuration_settings = readJSON()
    if (configuration):
        token = checkTokenValid(token, configuration_settings)
        starttime = dt.datetime.now()
        if token:
            loop = 0
            failed_download = []
            completed_download = []
            compelted_check = []
            completed_count = 0
            output_json_path = os.path.join(os.getcwd(), "output.json")
            if (os.path.exists(output_json_path)):
                while pending_completion and loop < 1000:
                    output_json = json.load(open(output_json_path, "r"))
                    loop += 1
                    logger.info("Loop " + str(loop))
                    # print(output_json)
                    if ("output_results" in output_json
                            and len(output_json["output_results"]) > 0):
                        output_json_result = output_json["output_results"]
                        new_output_json_result = []
                        for outresult in output_json_result:
                            result = outresult
                            try:
                                if "response" in result and "result" in json.loads(
                                        result["response"]
                                ) and "data" in json.loads(
                                        result["response"]
                                )["result"][0] and "analyzerId" in json.loads(
                                        result["response"]
                                )["result"][0]["data"]:
                                    if "download_success" not in result:
                                        response = json.loads(
                                            result["response"])
                                        path = result["path"]
                                        filename = result["filename"]
                                        analyzerId = response["result"][0][
                                            "data"]["analyzerId"]

                                        output_outputs = result["output_type"]
                                        latest_output_outputs = [
                                            output.replace("\"", "").upper()
                                            for output in output_outputs
                                        ]

                                        if ("download_completed"
                                                not in result):
                                            completed = checkCompleted(
                                                latest_output_outputs, result)
                                            if (completed):
                                                result[
                                                    "download_completed"] = True

                                            else:
                                                current_time = dt.datetime.now(
                                                )
                                                seconds = (
                                                    current_time -
                                                    starttime).total_seconds()
                                                if seconds < 7000 * 5:  # refresh zen token every 10 hours (7199 = 2 hours)
                                                    if token:
                                                        headers = {
                                                            'Authorization':
                                                            'Bearer {}'.format(
                                                                token)
                                                        }
                                                        logger.info(
                                                            "Checking status of analyzerId: {}, filename: {}"
                                                            .format(
                                                                analyzerId,
                                                                filename))
                                                        status, result_response = checkStatus(
                                                            configuration_settings,
                                                            token, analyzerId)

                                                        if (status):
                                                            if ("data" in
                                                                    result_response
                                                                    and
                                                                    "statusDetails"
                                                                    in
                                                                    result_response[
                                                                        "data"]
                                                                ):
                                                                status_result_response = result_response[
                                                                    "data"][
                                                                        "statusDetails"]

                                                                done_output = []
                                                                for output in status_result_response:
                                                                    # print(output)
                                                                    logger.info(
                                                                        "status: {}, type {}, analyzerId: {}, filename: {}"
                                                                        .
                                                                        format(
                                                                            output
                                                                            .
                                                                            get(
                                                                                "status"
                                                                            ),
                                                                            output
                                                                            .
                                                                            get(
                                                                                "type"
                                                                            ),
                                                                            analyzerId,
                                                                            filename
                                                                        ))
                                                                    if (output[
                                                                            "type"]
                                                                            in
                                                                            latest_output_outputs
                                                                            and
                                                                            output[
                                                                                "status"]
                                                                            ==
                                                                            "Completed"
                                                                            and
                                                                            output[
                                                                                "type"]
                                                                            not in
                                                                            result
                                                                        ):
                                                                        logger.info(
                                                                            "Downloading {0} of analyzerId: {1}"
                                                                            .
                                                                            format(
                                                                                output[
                                                                                    "type"],
                                                                                analyzerId
                                                                            ))
                                                                        response, reason = downloadFile(
                                                                            configuration_settings,
                                                                            token,
                                                                            analyzerId,
                                                                            output[
                                                                                "type"],
                                                                            path,
                                                                            filename
                                                                            .
                                                                            rsplit(
                                                                                "."
                                                                            )
                                                                            [0]
                                                                        )

                                                                        result[output[
                                                                            "type"]] = response
                                                                        if (not response
                                                                            ):
                                                                            result[
                                                                                output[
                                                                                    "type"]
                                                                                +
                                                                                "_error"] = reason
                                                                        done_output.append(
                                                                            output[
                                                                                "type"]
                                                                        )
                                                                    elif (output[
                                                                            "type"]
                                                                          in
                                                                          latest_output_outputs
                                                                          and
                                                                          output[
                                                                              "status"]
                                                                          ==
                                                                          "Failed"
                                                                          and
                                                                          output[
                                                                              "type"]
                                                                          not in
                                                                          result
                                                                          ):

                                                                        result[output[
                                                                            "type"]] = False
                                                                        result[
                                                                            output[
                                                                                "type"]
                                                                            +
                                                                            "_error"] = output[
                                                                                "status"]
                                                                        logger.info(
                                                                            "Processing failed of analyzerId: {}, filename: {}"
                                                                            .
                                                                            format(
                                                                                analyzerId,
                                                                                filename
                                                                            ))
                                                                        done_output.append(
                                                                            output[
                                                                                "type"]
                                                                        )
                                                                    elif output[
                                                                            "type"] in result:
                                                                        logger.info(
                                                                            "Processing of analyzerId: {}, filename: {}"
                                                                            .
                                                                            format(
                                                                                analyzerId,
                                                                                filename
                                                                            ))
                                                                        done_output.append(
                                                                            output[
                                                                                "type"]
                                                                        )
                                                                if (len(
                                                                        done_output
                                                                ) == len(
                                                                        latest_output_outputs
                                                                )):
                                                                    completed_download.append(
                                                                        True)
                                                                    compelted_check.append(
                                                                        filename
                                                                    )

                                                                completed = checkCompleted(
                                                                    latest_output_outputs,
                                                                    result)
                                                                if (completed):
                                                                    result[
                                                                        "download_completed"] = True

                                                        else:
                                                            logger.error(
                                                                "Cannot get status of analyzerId: {}, filename: {}"
                                                                .format(
                                                                    analyzerId,
                                                                    filename))
                                                            result[
                                                                "download_success"] = False
                                                            result[
                                                                "download_failure_reason"] = result_response
                                                            failed_download.append(
                                                                True)
                                                    else:
                                                        message = "Zen token is required to check file status, filename {}".format(
                                                            filename)
                                                        logger.error(message)
                                                        failed_download.append(
                                                            True)
                                                else:
                                                    token, checked_time = generateToken_pw_flow(
                                                        configuration_settings)

                                else:
                                    logger.error(
                                        "We could not find any information to download files from."
                                    )
                                    result["download_success"] = False
                                    result[
                                        "download_failure_reason"] = "No available data to download from"
                                    failed_download.append(True)
                            except Exception as err:
                                result["download_success"] = False
                                result[
                                    "download_failure_reason"] = "No analyzerID available to download results"
                                logger.error(
                                    "No analyzerID available to download results. The file upload may have failed. File name: {0}"
                                    .format(result["filename"]))
                                failed_download.append(True)
                                logger.error(err)

                            # new_output_json_result.append(result)
                            # print(len(new_output_json_result))
                            endtime = dt.datetime.now()
                            output_json["output_results"] = output_json_result
                            output_json["endtime"] = str(endtime)
                            json.dump(output_json,
                                      open("output.json", 'w'),
                                      indent=4)
                            completed_count = len(failed_download) + len(
                                completed_download)

                        logger.info("count, failed {}, completed {}".format(
                            len(failed_download), len(completed_download)))
                        if (completed_count >= len(output_json_result)):
                            print(compelted_check)
                            pending_completion = False
                        else:
                            if (loop >= 1000):
                                pending_completion = False
                                logger.error(
                                    "Reached maximum number of download retries."
                                )
                            else:
                                time.sleep(10)
                    else:
                        pending_completion = False
                        logger.error("No results available to download.")
                        return True

                logger.info(
                    "Done downloading all output files to your output_directory_path"
                )
                logger.info("Download status reported in output.json")
                if (loop < 999):
                    return True
                else:
                    return False
            else:
                logger.error(
                    "output.json file does not exist. No results available to download."
                )

                return False
        else:
            logger.error("Zen token is required to download the files")
            return False
    else:
        logger.error(
            "Check your configuration file (config.json) for correct format and valid parameters"
        )
        return False
示例#18
0
    token = getFirstToken()
    if token:
        token_time = dt.datetime.now()
        logger.info("Successfully generate the token, continue...")
        logger.info("Uploading files")
        uploadSuccess = uploadFiles(token)

        if (uploadSuccess):
            logger.info("Ready to download output files...")
            complete = downloadFiles(token)

            if (complete):
                logger.info("Deleting files on the server")
                deleteFiles(token)
            else:
                logger.info(
                    "Could not delete at this time because download has not been completed yet, please run deleteFiles.py at a later time"
                )

            logger.info("Updating report")
            updateReport()

        endtime = dt.datetime.now()
        seconds = (endtime - starttime).total_seconds()
        logger.info(
            "ADP API Sample tool ended. Processing time took {0} seconds, Disclaimer: This includes time to upload, download and delete and has nothing to do with actual processing time."
            .format(int(seconds)))
    else:
        logger.error("Failed to generate token to continue")