Exemplo n.º 1
0
def start_datascraper():
    parser = ArgumentParser()
    parser.add_argument("-m",
                        "--metadata",
                        action='store_true',
                        help="only exports metadata")
    args = parser.parse_args()
    if args.metadata:
        print("Exporting Metadata Only")
    log_error = main_helper.setup_logger('errors', 'errors.log')
    console = logging.StreamHandler()
    console.setLevel(logging.DEBUG)
    formatter = logging.Formatter(
        '%(asctime)s %(levelname)s %(name)s %(message)s')
    console.setFormatter(formatter)
    logging.getLogger("").addHandler(console)
    # root = os.getcwd()
    config_path = os.path.join('.settings', 'config.json')
    json_config, json_config2 = main_helper.get_config(config_path)
    json_settings = json_config["settings"]
    json_sites = json_config["supported"]
    infinite_loop = json_settings["infinite_loop"]
    global_user_agent = json_settings['global_user_agent']
    domain = json_settings["auto_site_choice"]
    path = os.path.join('.settings', 'extra_auth.json')
    extra_auth_config = json.load(open(path))
    exit_on_completion = json_settings['exit_on_completion']
    loop_timeout = json_settings['loop_timeout']

    string = "Site: "
    site_names = []
    bl = ["patreon"]
    if not domain:
        site_count = len(json_sites)
        count = 0
        for x in json_sites:
            if x in bl:
                continue
            string += str(count) + " = " + x
            site_names.append(x)
            if count + 1 != site_count:
                string += " | "

            count += 1
        string += "x = Exit"

    try:
        while True:
            if domain:
                site_name = domain
            else:
                print(string)
                x = input()
                if x == "x":
                    break
                x = int(x)
                site_name = site_names[x]
            site_name_lower = site_name.lower()

            json_auth_array = [json_sites[site_name_lower]["auth"]]

            json_site_settings = json_sites[site_name_lower]["settings"]
            auto_scrape_names = json_site_settings["auto_scrape_names"]
            extra_auth_settings = json_sites[site_name_lower][
                "extra_auth_settings"] if "extra_auth_settings" in json_sites[
                    site_name_lower] else {
                        "extra_auth": False
                    }
            extra_auth = extra_auth_settings["extra_auth"]
            if extra_auth:
                choose_auth = extra_auth_settings["choose_auth"]
                merge_auth = extra_auth_settings["merge_auth"]
                json_auth_array += extra_auth_config[site_name_lower][
                    "extra_auth"]
                if choose_auth:
                    json_auth_array = main_helper.choose_auth(json_auth_array)
            session_array = []
            x = onlyfans
            app_token = ""
            subscription_array = []
            legacy = True
            if site_name_lower == "onlyfans":
                legacy = False
                site_name = "OnlyFans"
                subscription_array = []
                auth_count = -1
                x.assign_vars(json_config, json_site_settings, site_name)
                for json_auth in json_auth_array:
                    auth_count += 1
                    app_token = json_auth['app_token']
                    user_agent = global_user_agent if not json_auth[
                        'user_agent'] else json_auth['user_agent']

                    x = onlyfans
                    session = x.create_session()
                    if not session:
                        print("Unable to create session")
                        continue
                    session = x.create_auth(session, user_agent, app_token,
                                            json_auth)
                    session_array.append(session)
                    if not session["session"]:
                        continue
                    # x.get_paid_posts(session["session"],app_token)
                    cookies = session["session"].cookies.get_dict()
                    auth_id = cookies["auth_id"]
                    json_auth['auth_id'] = auth_id
                    json_auth['auth_uniq_'] = cookies["auth_uniq_" + auth_id]
                    json_auth['auth_hash'] = cookies["auth_hash"]
                    json_auth['sess'] = cookies["sess"]
                    json_auth['fp'] = cookies["fp"]
                    if json_config != json_config2:
                        update_config(json_config)
                    me_api = session["me_api"]
                    array = x.get_subscriptions(session["session"], app_token,
                                                session["subscriber_count"],
                                                me_api, auth_count)
                    subscription_array += array
                subscription_array = x.format_options(subscription_array,
                                                      "usernames")
            if site_name_lower == "patreon":
                legacy = False
                site_name = "Patreon"
                subscription_array = []
                auth_count = -1
                x = patreon
                x.assign_vars(json_config, json_site_settings, site_name)
                for json_auth in json_auth_array:
                    auth_count += 1
                    user_agent = global_user_agent if not json_auth[
                        'user_agent'] else json_auth['user_agent']

                    session = x.create_session()
                    session = x.create_auth(session, user_agent, json_auth)
                    session_array.append(session)
                    if not session["session"]:
                        continue
                    cookies = session["session"].cookies.get_dict()
                    json_auth['session_id'] = cookies["session_id"]
                    if json_config != json_config2:
                        update_config(json_config)
                    me_api = session["me_api"]
                    array = x.get_subscriptions(session["session"], auth_count)
                    subscription_array += array
                subscription_array = x.format_options(subscription_array,
                                                      "usernames")
            elif site_name_lower == "starsavn":
                legacy = False
                site_name = "StarsAVN"
                subscription_array = []
                auth_count = -1
                x = starsavn
                x.assign_vars(json_config, json_site_settings, site_name)
                for json_auth in json_auth_array:
                    auth_count += 1
                    user_agent = global_user_agent if not json_auth[
                        'user_agent'] else json_auth['user_agent']
                    sess = json_auth['sess']

                    auth_array = dict()
                    auth_array["sess"] = sess
                    session = x.create_session()
                    session = x.create_auth(session, user_agent, app_token,
                                            json_auth)
                    session_array.append(session)
                    if not session["session"]:
                        continue
                    me_api = session["me_api"]
                    array = x.get_subscriptions(session["session"], app_token,
                                                session["subscriber_count"],
                                                me_api, auth_count)
                    subscription_array += array
                subscription_array = x.format_options(subscription_array,
                                                      "usernames")
            elif site_name == "fourchan":
                x = fourchan
                site_name = "4Chan"
                x.assign_vars(json_config, json_site_settings, site_name)
                session_array = [x.create_session()]
                array = x.get_subscriptions()
                subscription_array = x.format_options(array)
            elif site_name == "bbwchan":
                x = bbwchan
                site_name = "BBWChan"
                x.assign_vars(json_config, json_site_settings, site_name)
                session_array = [x.create_session()]
                array = x.get_subscriptions()
                subscription_array = x.format_options(array)
            names = subscription_array[0]
            if names:
                print("Names: Username = username | " + subscription_array[1])
                if not auto_scrape_names:
                    value = input().strip()
                    if value.isdigit():
                        if value == "0":
                            names = names[1:]
                        else:
                            names = [names[int(value)]]
                    else:
                        names = [name for name in names if value in name[1]]
                else:
                    value = 0
                    names = names[1:]
            else:
                print("There's nothing to scrape.")
                continue
            start_time = timeit.default_timer()
            download_list = []
            for name in names:
                # Extra Auth Support
                if not legacy:
                    json_auth = json_auth_array[name[0]]
                    auth_count = name[0]
                    session = session_array[auth_count]["session"]
                    name = name[-1]
                else:
                    session = session_array[0]["session"]
                main_helper.assign_vars(json_config)
                username = main_helper.parse_links(site_name_lower, name)
                result = x.start_datascraper(session,
                                             username,
                                             site_name,
                                             app_token,
                                             choice_type=value)
                if not args.metadata:
                    download_list.append(result)
            for y in download_list:
                for arg in y[1]:
                    x.download_media(*arg)
            stop_time = str(int(timeit.default_timer() - start_time) / 60)
            print('Task Completed in ' + stop_time + ' Minutes')
            if exit_on_completion:
                print("Now exiting.")
                exit(0)
            elif not infinite_loop:
                print("Input anything to continue")
                input()
            elif loop_timeout:
                print('Pausing scraper for ' + loop_timeout + ' seconds.')
                time.sleep(int(loop_timeout))
    except Exception as e:
        log_error.exception(e)
        input()
Exemplo n.º 2
0
from datetime import datetime
from itertools import chain, groupby, product
from multiprocessing.dummy import Pool as ThreadPool
from urllib.parse import urlparse

import requests
from requests.adapters import HTTPAdapter

import extras.OFSorter.ofsorter as ofsorter
from helpers.main_helper import (check_for_dupe_file, clean_text, create_sign,
                                 export_archive, format_directory,
                                 format_image, format_media_set, get_directory,
                                 json_request, log_error, reformat,
                                 setup_logger)

log_download = setup_logger('downloads', 'downloads.log')

json_config = None
multithreading = None
json_settings = None
auto_choice = None
j_directory = None
format_path = None
overwrite_files = None
proxy = None
cert = None
date_format = None
ignored_keywords = None
ignore_type = None
export_metadata = None
delete_legacy_metadata = None
Exemplo n.º 3
0
from urllib.parse import urlparse
import copy
import json
import jsonpickle
from deepdiff import DeepHash

import requests

import helpers.main_helper as main_helper
import classes.prepare_download as prepare_download
from types import SimpleNamespace

from helpers.main_helper import import_archive, export_archive

multiprocessing = main_helper.multiprocessing
log_download = main_helper.setup_logger('downloads', 'downloads.log')

json_config = None
json_global_settings = None
max_threads = -1
json_settings = None
auto_choice = None
j_directory = ""
metadata_directory_format = ""
file_directory_format = None
file_name_format = None
overwrite_files = None
date_format = None
ignored_keywords = None
ignore_type = None
export_metadata = None
Exemplo n.º 4
0
def start_datascraper():
    parser = ArgumentParser()
    parser.add_argument("-m",
                        "--metadata",
                        action='store_true',
                        help="only exports metadata")
    args = parser.parse_args()
    if args.metadata:
        print("Exporting Metadata Only")
    log_error = main_helper.setup_logger('errors', 'errors.log')
    console = logging.StreamHandler()
    console.setLevel(logging.DEBUG)
    formatter = logging.Formatter(
        '%(asctime)s %(levelname)s %(name)s %(message)s')
    console.setFormatter(formatter)
    logging.getLogger("").addHandler(console)
    # Open config.json and fill in MANDATORY information for the script to work
    path = os.path.join('.settings', 'config.json')
    if os.path.isfile(path):
        json_config = json.load(open(path))
    else:
        json_config = {}
    json_config2 = json.loads(
        json.dumps(make_config.start(**json_config),
                   default=lambda o: o.__dict__))
    if json_config != json_config2:
        update_config(json_config2)
    if not json_config:
        input(
            "The .settings\\config.json file has been created. Fill in whatever you need to fill in and then press enter when done.\n"
        )
        json_config2 = json.load(open(path))
    json_config = copy.deepcopy(json_config2)
    json_settings = json_config["settings"]
    json_sites = json_config["supported"]
    infinite_loop = json_settings["infinite_loop"]
    global_user_agent = json_settings['global_user_agent']
    domain = json_settings["auto_site_choice"]
    path = os.path.join('.settings', 'extra_auth.json')
    extra_auth_config = json.load(open(path))
    exit_on_completion = json_settings['exit_on_completion']
    loop_timeout = json_settings['loop_timeout']

    string = ""
    site_names = []
    if not domain:
        site_count = len(json_sites)
        count = 0
        for x in json_sites:
            string += str(count) + " = " + x
            site_names.append(x)
            if count + 1 != site_count:
                string += " | "

            count += 1
    try:
        while True:
            if domain:
                site_name = domain
            else:
                print("Site: " + string)
                x = int(input())
                site_name = site_names[x]
            site_name_lower = site_name.lower()
            json_auth_array = [json_sites[site_name_lower]["auth"]]

            json_site_settings = json_sites[site_name_lower]["settings"]
            auto_scrape_names = json_site_settings["auto_scrape_names"]
            extra_auth_settings = json_sites[site_name_lower][
                "extra_auth_settings"] if "extra_auth_settings" in json_sites[
                    site_name_lower] else {
                        "extra_auth": False
                    }
            extra_auth = extra_auth_settings["extra_auth"]
            if extra_auth:
                choose_auth = extra_auth_settings["choose_auth"]
                merge_auth = extra_auth_settings["merge_auth"]
                json_auth_array += extra_auth_config[site_name_lower][
                    "extra_auth"]
                if choose_auth:
                    json_auth_array = main_helper.choose_auth(json_auth_array)
            session_array = []
            x = onlyfans
            app_token = ""
            subscription_array = []
            legacy = True
            if site_name_lower == "onlyfans":
                legacy = False
                site_name = "OnlyFans"
                subscription_array = []
                auth_count = -1
                x.assign_vars(json_config, json_site_settings)
                for json_auth in json_auth_array:
                    auth_count += 1
                    app_token = json_auth['app_token']
                    user_agent = global_user_agent if not json_auth[
                        'user_agent'] else json_auth['user_agent']

                    x = onlyfans
                    session = x.create_session()
                    session = x.create_auth(session, user_agent, app_token,
                                            json_auth)
                    session_array.append(session)
                    if not session["session"]:
                        continue
                    cookies = session["session"].cookies.get_dict()
                    json_auth['auth_id'] = cookies["auth_id"]
                    json_auth['auth_hash'] = cookies["auth_hash"]
                    json_auth['sess'] = cookies["sess"]
                    json_auth['fp'] = cookies["fp"]
                    if json_config != json_config2:
                        update_config(json_config)
                    me_api = session["me_api"]
                    array = x.get_subscriptions(session["session"], app_token,
                                                session["subscriber_count"],
                                                me_api, auth_count)
                    subscription_array += array
                subscription_array = x.format_options(subscription_array,
                                                      "usernames")
            elif site_name_lower == "starsavn":
                legacy = False
                site_name = "StarsAVN"
                subscription_array = []
                auth_count = -1
                x = starsavn
                x.assign_vars(json_config, json_site_settings)
                for json_auth in json_auth_array:
                    auth_count += 1
                    user_agent = global_user_agent if not json_auth[
                        'user_agent'] else json_auth['user_agent']
                    sess = json_auth['sess']

                    auth_array = dict()
                    auth_array["sess"] = sess
                    session = x.create_session(user_agent, app_token,
                                               auth_array)
                    session_array.append(session)
                    if not session["session"]:
                        continue
                    me_api = session["me_api"]
                    array = x.get_subscriptions(session["session"], app_token,
                                                session["subscriber_count"],
                                                me_api, auth_count)
                    subscription_array += array
                subscription_array = x.format_options(subscription_array,
                                                      "usernames")
            elif site_name == "fourchan":
                x = fourchan
                site_name = "4Chan"
                x.assign_vars(json_config, json_site_settings)
                session_array = [x.create_session()]
                array = x.get_subscriptions()
                subscription_array = x.format_options(array)
            elif site_name == "bbwchan":
                x = bbwchan
                site_name = "BBWChan"
                x.assign_vars(json_config, json_site_settings)
                session_array = [x.create_session()]
                array = x.get_subscriptions()
                subscription_array = x.format_options(array)
            names = subscription_array[0]
            if names:
                print("Names: " + subscription_array[1])
                if not auto_scrape_names:
                    value = int(input().strip())
                else:
                    value = 0
                if value:
                    names = [names[value]]
                else:
                    names.pop(0)
            else:
                print("There's nothing to scrape.")
                continue
            start_time = timeit.default_timer()
            download_list = []
            for name in names:
                # Extra Auth Support
                if not legacy:
                    json_auth = json_auth_array[name[0]]
                    auth_count = name[0]
                    session = session_array[auth_count]["session"]
                    name = name[1]
                else:
                    session = session_array[0]["session"]
                main_helper.assign_vars(json_config)
                username = main_helper.parse_links(site_name_lower, name)
                result = x.start_datascraper(session,
                                             username,
                                             site_name,
                                             app_token,
                                             choice_type=value)
                if not args.metadata:
                    download_list.append(result)
            for y in download_list:
                for arg in y[1]:
                    x.download_media(*arg)
            stop_time = str(int(timeit.default_timer() - start_time) / 60)
            print('Task Completed in ' + stop_time + ' Minutes')
            if exit_on_completion:
                print("Now exiting.")
                exit(0)
            elif not infinite_loop:
                print("Input anything to continue")
                input()
            elif loop_timeout:
                print('Pausing scraper for ' + loop_timeout + ' seconds.')
                time.sleep(int(loop_timeout))
    except Exception as e:
        log_error.exception(e)
        input()
Exemplo n.º 5
0
def start_datascraper():
    parser = ArgumentParser()
    parser.add_argument("-m",
                        "--metadata",
                        action='store_true',
                        help="only exports metadata")
    args = parser.parse_args()
    if args.metadata:
        print("Exporting Metadata Only")
    log_error = main_helper.setup_logger('errors', 'errors.log')
    console = logging.StreamHandler()
    console.setLevel(logging.DEBUG)
    formatter = logging.Formatter(
        '%(asctime)s %(levelname)s %(name)s %(message)s')
    console.setFormatter(formatter)
    logging.getLogger("").addHandler(console)
    # root = os.getcwd()
    config_path = os.path.join('.settings', 'config.json')
    json_config, json_config2 = main_helper.get_config(config_path)
    json_settings = json_config["settings"]
    json_sites = json_config["supported"]
    infinite_loop = json_settings["infinite_loop"]
    global_user_agent = json_settings['global_user_agent']
    domain = json_settings["auto_site_choice"]
    path = os.path.join('.settings', 'extra_auth.json')
    # extra_auth_config, extra_auth_config2 = main_helper.get_config(path)
    extra_auth_config = {}
    exit_on_completion = json_settings['exit_on_completion']
    loop_timeout = json_settings['loop_timeout']
    main_helper.assign_vars(json_config)

    string, site_names = module_chooser(domain, json_sites)
    try:
        while True:
            if domain:
                if site_names:
                    site_name = domain
                else:
                    print(string)
                    continue
            else:
                print(string)
                x = input()
                if x == "x":
                    break
                x = int(x)
                site_name = site_names[x]
            site_name_lower = site_name.lower()

            json_auth_array = [json_sites[site_name_lower]["auth"]]

            json_site_settings = json_sites[site_name_lower]["settings"]
            auto_scrape_names = json_site_settings["auto_scrape_names"]
            extra_auth_settings = json_sites[site_name_lower][
                "extra_auth_settings"] if "extra_auth_settings" in json_sites[
                    site_name_lower] else {
                        "extra_auth": False
                    }
            extra_auth = extra_auth_settings["extra_auth"]
            if extra_auth:
                choose_auth = extra_auth_settings["choose_auth"]
                merge_auth = extra_auth_settings["merge_auth"]
                json_auth_array += extra_auth_config["supported"][
                    site_name_lower]["auths"]
                if choose_auth:
                    json_auth_array = main_helper.choose_auth(json_auth_array)
            apis = []
            module = m_onlyfans
            subscription_array = []
            legacy = True
            original_sessions = api_helper.create_session(
                settings=json_settings)
            if not original_sessions:
                print("Unable to create session")
                continue
            archive_time = timeit.default_timer()
            if site_name_lower == "onlyfans":
                site_name = "OnlyFans"
                subscription_array = []
                auth_count = -1
                jobs = json_site_settings["jobs"]
                for json_auth in json_auth_array:
                    api = OnlyFans.start(original_sessions)
                    auth_count += 1
                    user_agent = global_user_agent if not json_auth[
                        'user_agent'] else json_auth['user_agent']

                    module = m_onlyfans
                    module.assign_vars(json_auth, json_config,
                                       json_site_settings, site_name)
                    api.set_auth_details(**json_auth,
                                         global_user_agent=user_agent)
                    identifier = ""
                    setup = module.account_setup(api, identifier=identifier)
                    if not setup:
                        continue
                    if jobs["scrape_names"]:
                        array = module.manage_subscriptions(
                            api, auth_count, identifier=identifier)
                        subscription_array += array
                    apis.append(api)
                subscription_list = module.format_options(
                    subscription_array, "usernames")
                if jobs["scrape_paid_content"]:
                    print("Scraping Paid Content")
                    paid_content = module.paid_content_scraper(apis)
                if jobs["scrape_names"]:
                    print("Scraping Subscriptions")
                    x = main_helper.process_names(module, subscription_list,
                                                  auto_scrape_names,
                                                  json_auth_array, apis,
                                                  json_config, site_name_lower,
                                                  site_name)
                x = main_helper.process_downloads(apis, module)
                print
            elif site_name_lower == "starsavn":
                site_name = "StarsAVN"
                subscription_array = []
                auth_count = -1
                for json_auth in json_auth_array:
                    sessions = api_helper.copy_sessions(original_sessions)
                    api = StarsAVN.start(sessions)
                    auth_count += 1
                    user_agent = global_user_agent if not json_auth[
                        'user_agent'] else json_auth['user_agent']

                    module = m_starsavn
                    module.assign_vars(json_auth, json_config,
                                       json_site_settings, site_name)
                    api.set_auth_details(**json_auth,
                                         global_user_agent=user_agent)
                    setup = module.account_setup(api)
                    if not setup:
                        continue
                    jobs = json_site_settings["jobs"]
                    if jobs["scrape_names"]:
                        array = module.manage_subscriptions(api, auth_count)
                        subscription_array += array
                    if jobs["scrape_paid_content"]:
                        paid_contents = api.get_paid_content()
                        paid_content = module.paid_content_scraper(api)
                    apis.append(api)
                subscription_array = module.format_options(
                    subscription_array, "usernames")
            stop_time = str(int(timeit.default_timer() - archive_time) /
                            60)[:4]
            print('Archive Completed in ' + stop_time + ' Minutes')
            if exit_on_completion:
                print("Now exiting.")
                exit(0)
            elif not infinite_loop:
                print("Input anything to continue")
                input()
            elif loop_timeout:
                print('Pausing scraper for ' + loop_timeout + ' seconds.')
                time.sleep(int(loop_timeout))
    except Exception as e:
        log_error.exception(e)
        input()
Exemplo n.º 6
0
#!/usr/bin/env python3
import tests.main_test as main_test
import os
import logging
import time

main_test.version_check()
main_test.check_config()
main_test.check_profiles()

if __name__ == "__main__":
    import datascraper.main_datascraper as main_datascraper
    import helpers.main_helper as main_helper
    log_error = main_helper.setup_logger('errors', 'errors.log')
    console = logging.StreamHandler()
    console.setLevel(logging.DEBUG)
    formatter = logging.Formatter(
        '%(asctime)s %(levelname)s %(name)s %(message)s')
    console.setFormatter(formatter)
    logging.getLogger("").addHandler(console)
    config_path = os.path.join('.settings', 'config.json')
    json_config, json_config2 = main_helper.get_config(config_path)
    json_settings = json_config["settings"]
    exit_on_completion = json_settings['exit_on_completion']
    infinite_loop = json_settings["infinite_loop"]
    loop_timeout = json_settings['loop_timeout']
    json_sites = json_config["supported"]
    domain = json_settings["auto_site_choice"]
    string, site_names = main_helper.module_chooser(domain, json_sites)
    while True:
        try:
Exemplo n.º 7
0
import requests
from helpers.main_helper import get_directory, json_request, reformat, format_directory, format_media_set, export_archive, format_image, check_for_dupe_file, setup_logger

import os
import json
from itertools import count, product
from itertools import chain
import multiprocessing
from multiprocessing.dummy import Pool as ThreadPool
from datetime import datetime
import logging
import math
from random import randrange

log_download = setup_logger('downloads', 'downloads.log')
log_error = setup_logger('errors', 'errors.log')

# Open config.json and fill in OPTIONAL information
path = os.path.join('.settings', 'config.json')
json_config = json.load(open(path))
json_global_settings = json_config["settings"]
multithreading = json_global_settings["multithreading"]
json_settings = json_config["supported"]["stars_avn"]["settings"]
auto_choice = json_settings["auto_choice"]
j_directory = get_directory(json_settings['directory'])
format_path = json_settings['file_name_format']
overwrite_files = json_settings["overwrite_files"]
date_format = json_settings["date_format"]
ignored_keywords = json_settings["ignored_keywords"]
ignore_unfollowed_accounts = json_settings["ignore_unfollowed_accounts"]
export_metadata = json_settings["export_metadata"]