Пример #1
0
def list_pdf_v2(configs,
                save_dir,
                name_in_url=True,
                extract_name=False,
                add_date=False,
                try_overwite=False,
                no_overwrite=False,
                debug=False):
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    html_page = requests.get(configs.webpage).text
    soup = BeautifulSoup(html_page, "html.parser")

    url_name = []

    try:
        os.remove("url_name.txt")
    except FileNotFoundError:
        pass
    extract_info(soup, configs, extract_name=extract_name)
    get_files(save_dir,
              configs.sleep_time,
              name_in_url=name_in_url,
              add_date=add_date)
Пример #2
0
def decrypt_file(gpg: gnupg.GPG, targer_folder: str):
    configuration_kwargs = utils.get_configuratation(
        path=os.path.join(targer_folder, 'configuration'))
    gpg = _get_decryption(gpg=gpg, configuration_kwargs=configuration_kwargs)

    encrypted_filepaths = utils.get_files(
        path=os.path.join(targer_folder, 'encrypted_files'))

    decrypted_path = os.path.join(targer_folder, 'decrypted_files')
    password = configuration_kwargs.get('array')['passphrase'] \
        if settings.ENVIRONMENT == 'prod' else 'wow'

    for encryped_file in encrypted_filepaths:
        base_name = __set_file_name(path=encryped_file)

        logger.info(f'Decrypting file {base_name}')

        output_file = os.path.join(decrypted_path, base_name)

        with open(encryped_file, 'rb') as connection:
            status = gpg.decrypt_file(file=connection,
                                      passphrase=password,
                                      output=output_file)
            logger.info(f'Decrypting done')

        if not status.ok:
            logger.info(f'{status.stderr}')
        else:
            logger.info(f'Status [{status.status}]')
Пример #3
0
def encrypt_file(gpg: gnupg.GPG,
                 targer_folder: str):
    configuration_kwargs = utils.get_configuratation(
        path=os.path.join(
            targer_folder,
            'configuration'
        )
    )
    value = _get_decryption(configuration_kwargs=configuration_kwargs)

    gpg.import_keys(value)
    keys = gpg.list_keys()

    gpg.trust_keys(
        keys.fingerprints,
        'TRUST_ULTIMATE'
    )

    decrypted_filepaths = utils.get_files(
        path=os.path.join(
            targer_folder,
            'decrypted_files'
        )
    )

    encrypted_path = os.path.join(
        targer_folder,
        'encrypted_files'
    )

    for decryped_file in decrypted_filepaths:
        base_name = __set_file_name(path=decryped_file)

        logger.info(f'encrypting file {base_name}')

        output_file = os.path.join(
            encrypted_path,
            base_name
        )

        with open(decryped_file, 'rb') as connection:
            status = gpg.encrypt_file(
                file=connection,
                recipients=keys.fingerprints,
                output=output_file,
            )

            logger.info(f'encrypting done')

        if not status.ok:
            logger.info(f'{status.stderr}')
        else:
            logger.info(f'Status [{status.status}]')
Пример #4
0
            try:
                name_table = []
                for link_2 in soup.findAll("span"):
                    if "hyperlink" in str(link_2.get("class")):
                        name_table.append(link_2.string)
                name = name_table[0]
                # print(link)
            except KeyError:
                print("KeyError")
                pass
            # print("Else " + name )
        # name = name[:name.rindex('.')]
        with open("url_name.txt", "a") as output:
            if "https" in link["href"]:
                output.write(url + ", " + name.strip("/") + ".pdf" + "\n")
            else:
                # Uncomment following line if domain is not in href, and comment out line above
                output.write(domain + url + ", " + name.strip("/") + ".pdf" +
                             "\n")
    print("Done")


try:
    os.remove("url_name.txt")
except FileNotFoundError:
    pass
extract_info(soup)
get_files(save_dir, sleep_time)

# import etl.py
Пример #5
0
def list_pdf_v3(
    configs,
    save_dir,
    debug=False,
    delete=True,
    important=False,
    try_overwite=False,
    name_in_url=True,
    add_date=False,
    extract_name=False,
    no_overwrite=False,
):  # try_overwite is for get_files

    if not os.path.exists(save_dir):
        print(" [*] Making save_dir")
        os.makedirs(save_dir)
    print(" [*] Getting webpage and parsing")
    html_page = requests.get(configs.webpage).text
    soup = BeautifulSoup(html_page, "html.parser")
    if delete != False:
        try:
            os.remove("url_name.txt")
        except FileNotFoundError:
            pass
    print(" [*] Extracting info.")
    extract_info(soup, configs, extract_name=extract_name, name_in_url=name_in_url)

    if not important:
        print(" [?] important is False, using non_important")
        non_important = configs.non_important
        print("   [*] Opening url_name.txt")
        with open("url_name.txt", "r") as og_file, open(
            "2url_name.txt", "w"
        ) as new_file:
            print("   [*] Adding only important lines to 2url_name.txt")
            for line in og_file:
                if not any(
                    non_important in line.lower() for non_important in non_important
                ):
                    new_file.write(line)
                # print("   [*] The following lines were not added: " + str(line))
            print(" [*] Done writing")
    else:
        print(" [?] important is True, assuming important is configured")
        try:
            important = configs.important
        except AttributeError:
            # print("")
            print("   [!] Important is still named `non_important`")
            # print("")
            important = configs.non_important
        print(" [*] Opening url_name.txt")
        with open("url_name.txt", "r") as og_file, open(
            "2url_name.txt", "w"
        ) as new_file:
            print("   [*] Adding lines containing: " + str(important))
            for line in og_file:
                if any(important in line.lower() for important in important):
                    new_file.write(line)
                    print(line)
            print(" [*] Done writing")
    if debug != True:
        try:
            os.remove("url_name.txt")
        except FileNotFoundError:
            pass
        os.rename("2url_name.txt", "url_name.txt")

    get_files(
        save_dir,
        configs.sleep_time,
        debug=debug,
        delete=delete,
        try_overwite=try_overwite,
        name_in_url=name_in_url,
        no_overwrite=no_overwrite,
        add_date=add_date,
    )