def thread_function(q: Queue, thread_lock: threading.Lock, count: int, total: int, client: StashInterface):
    log.LogDebug(f"Created {threading.current_thread().name}")
    while not q.empty():
        image = q.get()

        image_data = {
            'id': image.get('id'),
            'title': image.get('title')
        }
        if image.get('rating'):
            image_data['rating'] = image.get('rating')
        if image.get('studio'):
            image_data['studio_id'] = image.get('studio').get('id')
        if image.get('performers'):
            performer_ids = [p.get('id') for p in image.get('performers')]
            image_data['performer_ids'] = performer_ids
        if image.get('tags'):
            tag_ids = [t.get('id') for t in image.get('tags')]
            image_data['tag_ids'] = tag_ids
        if image.get('galleries'):
            gallery_ids = [g.get('id') for g in image.get('galleries')]
            image_data['gallery_ids'] = gallery_ids

        client.updateImage(image_data)

        thread_lock.acquire()
        count += 1
        log.LogProgress(count / total)
        thread_lock.release()

        q.task_done()
    log.LogDebug(f"{threading.current_thread().name} finished")
    return True
示例#2
0
def doLongTask():
    total = 100
    upTo = 0

    log.LogInfo("Doing long task")
    while upTo < total:
        time.sleep(1)

        log.LogProgress(float(upTo) / float(total))
        upTo = upTo + 1
示例#3
0
def __bulk_scrape_scene_url(client, scenes, delay=5):
    last_request = -1
    if delay > 0:
        # Initialize last request with current time + delay time
        last_request = time.time() + delay

    # Number of scraped scenes
    count = 0

    total = len(scenes)
    # Index for progress bar
    i = 0

    # Scrape scene with existing metadata
    for scene in scenes:
        # Update status bar
        i += 1
        log.LogProgress(i/total)

        if delay:
            wait(delay, last_request, time.time())

        # Create dict with scene data
        scene_data = {
            'id': scene.get('id'),
        }

        # Extract scraper ID if appended to control tag, then scrape scene
        if '_' in control_tag:
            scraper_id = control_tag.split('_')[-1]
            scraped_data = client.scrapeScene(scene_data, scraper_id)
        else:
            scraped_data = client.scrapeScene(scene_data)

        # No data has been found for this scene
        if scraped_data is None or not any(scraped_data.values()):
            log.LogInfo(f"Could not get data for scene {scene.get('id')}")
            continue

        # Create dict with scene data
        update_data = {
            'id': scene.get('id')
        }
        if scraped_data.get('url'):
            update_data['url'] = scraped_data.get('url')

        # Update scene with scraped scene data
        client.updateScene(update_data)
        log.LogDebug(f"Scraped data for scene {scene.get('id')}")
        count += 1

    return count
示例#4
0
def read_urls_and_download():
    with open(os.path.join(plugin_folder, 'urls.txt'), 'r') as url_file:
        urls = url_file.readlines()
    downloaded = []
    total = len(urls)
    i = 0
    for url in urls:
        i += 1
        log.LogProgress(i/total)
        if check_url_valid(url.strip()):
            download(url.strip(), downloaded)
    if os.path.isfile(downloaded_json):
        shutil.move(downloaded_json, downloaded_backup_json)
    with open(downloaded_json, 'w') as outfile:
        json.dump(downloaded, outfile)
示例#5
0
def __bulk_scrape(client,
                  scenes,
                  create_missing_performers=False,
                  create_missing_tags=False,
                  create_missing_studios=False,
                  delay=5):
    last_request = -1
    if delay > 0:
        # Initialize last request with current time + delay time
        last_request = time.time() + delay

    missing_scrapers = list()

    # Number of scraped scenes
    count = 0

    total = len(scenes)
    # Index for progress bar
    i = 0

    # Scrape if url not in missing_scrapers
    for scene in scenes:
        # Update status bar
        i += 1
        log.LogProgress(i / total)

        if scene.get('url') is None or scene.get('url') == "":
            log.LogInfo(f"Scene {scene.get('id')} is missing url")
            continue
        if urlparse(scene.get("url")).netloc not in missing_scrapers:
            if delay:
                wait(delay, last_request, time.time())
            scraped_data = client.scrapeSceneURL(scene.get('url'))
            # If result is null, add url to missing_scrapers
            if scraped_data is None:
                log.LogWarning(
                    f"Missing scraper for {urlparse(scene.get('url')).netloc}")
                missing_scrapers.append(urlparse(scene.get('url')).netloc)
                continue
            # No data has been found for this scene
            if not any(scraped_data.values()):
                log.LogInfo(f"Could not get data for scene {scene.get('id')}")
                continue

            # Create dict with scene data
            update_data = {'id': scene.get('id')}
            if scraped_data.get('title'):
                update_data['title'] = scraped_data.get('title')
            if scraped_data.get('details'):
                update_data['details'] = scraped_data.get('details')
            if scraped_data.get('date'):
                update_data['date'] = scraped_data.get('date')
            if scraped_data.get('image'):
                update_data['cover_image'] = scraped_data.get('image')
            if scraped_data.get('tags'):
                tag_ids = list()
                for tag in scraped_data.get('tags'):
                    if tag.get('stored_id'):
                        tag_ids.append(tag.get('stored_id'))
                    else:
                        if create_missing_tags and tag.get('name') != "":
                            # Capitalize each word
                            tag_name = " ".join(
                                x.capitalize()
                                for x in tag.get('name').split(" "))
                            log.LogInfo(f'Create missing tag: {tag_name}')
                            tag_id = client.createTagWithName(tag_name)
                            tag_ids.append(tag_id)
                if len(tag_ids) > 0:
                    update_data['tag_ids'] = tag_ids

            if scraped_data.get('performers'):
                performer_ids = list()
                for performer in scraped_data.get('performers'):
                    if performer.get('stored_id'):
                        performer_ids.append(performer.get('stored_id'))
                    else:
                        if create_missing_performers and performer.get(
                                'name') != "":
                            performer_name = " ".join(
                                x.capitalize()
                                for x in performer.get('name').split(" "))
                            log.LogInfo(
                                f'Create missing performer: {performer_name}')
                            performer_id = client.createPerformerByName(
                                performer_name)
                            performer_ids.append(performer_id)
                if len(performer_ids) > 0:
                    update_data['performer_ids'] = performer_ids

            if scraped_data.get('studio'):
                studio = scraped_data.get('studio')
                if studio.get('stored_id'):
                    update_data['studio_id'] = studio.get('stored_id')
                else:
                    if create_missing_studios:
                        studio_name = " ".join(
                            x.capitalize()
                            for x in studio.get('name').split(" "))
                        log.LogInfo(f'Creating missing studio {studio_name}')
                        studio_url = '{uri.scheme}://{uri.netloc}'.format(
                            uri=urlparse(scene.get('url')))
                        studio_id = client.createStudio(
                            studio_name, studio_url)
                        update_data['studio_id'] = studio_id

            # Update scene with scraped scene data
            client.updateScene(update_data)
            log.LogDebug(f"Scraped data for scene {scene.get('id')}")
            count += 1

    return count
示例#6
0
def __bulk_create_performer(client, scenes, create_missing_performers, parse_performer_pattern, delay):
    last_request = -1
    if delay > 0:
        # Initialize last request with current time + delay time
        last_request = time.time() + delay

    # Number of created performers
    count = 0

    total = len(scenes)
    # Index for progress bar
    i = 0

    # List all performers in database
    all_performers = client.listPerformers()

    for scene in scenes:
        # Update status bar
        i += 1
        log.LogProgress(i/total)

        if scene.get('path') is None or scene.get('path') == "":
            log.LogInfo(f"Scene {scene.get('id')} is missing path")
            continue

        # Parse performer name from scene basename file path
        scene_basename = os.path.basename(scene['path'])
        log.LogInfo(f"Scene basename is: {scene_basename}")
        performer_regex = re.compile(parse_performer_pattern)
        parsed_performer_regex = performer_regex.search(scene_basename)
        if parsed_performer_regex is None:
            log.LogInfo(f"No Performer found Scene {scene.get('id')} filename")
            continue
        parsed_performer_name = ' '.join(parsed_performer_regex.groups())
        log.LogInfo(f"Parsed performer name is: {parsed_performer_name}")

        # If performer name successfully parsed from scene basename
        if parsed_performer_name:
            # Create dict with scene data
            update_data = {
                'id': scene.get('id')
            }

            # List all performers currently attached to scene
            scene_performers = [sp['name'].lower() for sp in scene['performers']]
            log.LogInfo(f"Current scene performers are: {scene_performers}")

            # Check if performer already attached to scene
            performer_ids = list()
            if parsed_performer_name.lower() in scene_performers:
                continue
            else:
                # Check if performer already exists in database
                for performer in all_performers:
                    if  performer['name'] and parsed_performer_name.lower() == performer['name'].lower():
                        performer_ids.append(performer['id'])
                        break
                    if performer['aliases'] and parsed_performer_name.lower() in [p.strip().lower() for p in performer['aliases'].replace('/', ',').split(',')]:
                        performer_ids.append(performer['id'])
                        break
                else:
                    # Create performer if not in database
                    if create_missing_performers and parsed_performer_name != "":
                        performer_name = " ".join(x.capitalize() for x in parsed_performer_name.split(" "))
                        log.LogInfo(f'Create missing performer: {performer_name}')
                        performer_id = client.createPerformerByName(performer_name)
                        performer_ids.append(performer_id)
                        # Add newly created performer to all performers list
                        all_performers.append({'id':performer_id, 'name':performer_name, 'aliases':''})

                # Add found/created performer IDs to scene update data
                if len(performer_ids) > 0:
                    update_data['performer_ids'] = performer_ids
                    log.LogInfo(f"Performer IDs found: {performer_ids}")

                # Update scene with parsed performer data
                client.updateScene(update_data)
                log.LogDebug(f"Updated performer data for scene {scene.get('id')}")
                count += 1

    return count
示例#7
0
def tag_scenes(client):
    endRegex = r'\.(?:[mM][pP]4 |[wW][mM][vV])$'
    beginRegex = ".*("
    if not os.path.isfile(downloaded_json) and os.path.isfile(downloaded_backup_json):
        shutil.copyfile(downloaded_backup_json, downloaded_json)
    with open(downloaded_json) as json_file:
        data = json.load(json_file)
        for i in range(0, len(data)):
            if i < len(data) - 1:
                beginRegex += data[i]['id'] + "|"
            else:
                beginRegex += data[i]['id'] + ").*"
        log.LogDebug(beginRegex + endRegex)
        scenes = client.findScenesByPathRegex(beginRegex)

        total = len(scenes)
        i = 0
        for scene in scenes:
            i += 1
            log.LogProgress(i/total)
            log.LogDebug(os.path.join("ScenePath", scene.get('path')))
            basename = os.path.basename(scene.get('path'))
            filename = os.path.splitext(basename)[0]

            found_video = None
            for video in data:
                if video['id'] in filename:
                    found_video = video
                    break
            if found_video is not None:
                scene_data = {
                    'id': scene.get('id'),
                    'url': video['url'],
                    'title': video['title']
                }

                # Required, would be cleared otherwise
                if scene.get('rating'):
                    scene_data['rating'] = scene.get('rating')

                tag_ids = []
                for t in scene.get('tags'):
                    tag_ids.append(t.get('id'))
                tag_ids.append(get_scrape_tag(client))
                scene_data['tag_ids'] = tag_ids

                performer_ids = []
                for p in scene.get('performers'):
                    performer_ids.append(p.get('id'))
                scene_data['performer_ids'] = performer_ids

                if scene.get('studio'):
                    scene_data['studio_id'] = scene.get('studio').get('id')

                if scene.get('gallery'):
                    scene_data['gallery_id'] = scene.get('gallery').get('id')

                if scene.get('rating'):
                    scene_data['rating'] = scene.get('rating')

                client.updateScene(scene_data)
def createPerformers(client):
    performers = client.listPerformers()
    performers_to_lookup = set()

    idx = 0
    while True:
        scenes = client.listScenes(idx)
        idx += 1
        if not scenes:
            break

        for scene in scenes:
            path = scene["path"]
            performers_in_scene = [s["name"].lower() for s in scene["performers"]]
            file_name = os.path.basename(path)
            file_name, _ = os.path.splitext(file_name)
            file_name = file_name.replace("-", ",").replace(",", " , ")
            doc = nlp(file_name)
            performers_names = set()
            for w in doc.ents:
                if w.label_ == "PERSON":
                    performers_names.add(w.text.strip().title())

            if len(file_name.split()) == 2 and not any(
                char.isdigit() for char in file_name
            ):
                performers_names.add(file_name.strip().title())

            for p in performers_names:
                if (
                    p.lower() not in performers_in_scene
                    and p.lower() not in performers
                    and len(p.split()) != 1
                ):
                    performers_to_lookup.add(p)

    total = len(performers_to_lookup)
    total_added = 0
    log.LogInfo("Going to look up {} performers".format(total))

    for i, performer in enumerate(performers_to_lookup):
        log.LogInfo("Searching: " + performer)
        log.LogProgress(float(i) / float(total))
        try:
            data = client.findPerformer(performer)
        except Exception as e:
            log.LogError(str(e))
            continue

        # Add a little random sleep so we don't flood the services
        time.sleep(random.uniform(0.2, 1))
        if not data:
            continue

        if data.get('gender'):
            data["gender"] = data["gender"].upper()

        data = {k: v for k, v in data.items() if v is not None and v != ""}

        log.LogInfo("Adding: " + performer)
        try:
            client.createPerformer(data)
            total_added += 1
        except Exception as e:
            log.LogError(str(e))

    log.LogInfo("Added a total of {} performers".format(total_added))
    log.LogInfo("Done!")
示例#9
0
def __bulk_scrape(client,
                  entities: Dict[Entity, array.array],
                  create_missing_performers=False,
                  create_missing_tags=False,
                  create_missing_studios=False,
                  create_missing_movies=False,
                  delay=5) -> None:
    last_request = -1
    # Unpack entity dict and iterate over each type (scenes, galleries)
    # entities is non empty and contains at least one non empty entity type
    for entity_class, entity_array in entities.items():
        log.LogInfo(f"Scraping {entity_class.value}")
        # Fetch available url scrapers for entity type
        if entity_class is Entity.Scene:
            supported_scrapers = client.sceneScraperURLs()
        elif entity_class is Entity.Gallery:
            supported_scrapers = client.galleryScraperURLs()
        else:
            raise TypeError(f"Unexpected Entity type: {entity_class}")

        if delay > 0:
            # Initialize last request with current time + delay time
            last_request = time.time() + delay

        missing_scrapers = list()

        # Number of scraped scenes
        count = 0

        total = len(entity_array)
        # Index for progress bar
        i = 0

        # Scrape if url not in missing_scrapers
        for entity in entity_array:
            # Update status bar
            i += 1
            log.LogProgress(i / total)

            if entity.get('url') is None or entity.get('url') == "":
                # Skip the scene/gallery if it does not have an url
                log.LogInfo(
                    f"{entity_class.name} {entity.get('id')} is missing url")
                continue

            url_netloc = urlparse(entity.get("url")).netloc.split('www.')[
                -1]  # URL domain name (without www. and tld)
            if url_netloc not in missing_scrapers:
                if delay:
                    last_request = wait(delay, last_request, time.time())

                # The query has different fields, so there can not be one scrapeURL function
                if entity_class is Entity.Scene:
                    scraped_data = client.scrapeSceneURL(entity.get('url'))
                elif entity_class is Entity.Gallery:
                    scraped_data = client.scrapeGalleryURL(entity.get('url'))
                else:
                    raise TypeError(f"Unexpected Entity type: {entity_class}")
                if scraped_data is None:
                    if url_netloc not in supported_scrapers:
                        # If result is null, and url is not in list of supported scrapers, add url to missing_scrapers
                        # Faster then checking every time, if url is in list of supported scrapers
                        log.LogWarning(
                            f"{entity_class.name} {entity.get('id')}: "
                            f"Missing scraper for {url_netloc}")
                        log.LogDebug(f"Full url: {entity.get('url')}")
                        missing_scrapers.append(url_netloc)
                    else:
                        log.LogInfo(
                            f"Could not scrape {entity_class.name.lower()} {entity.get('id')}"
                        )
                        log.LogDebug("Return data was None")
                    continue
                # No data has been found for this scene
                if not any(scraped_data.values()):
                    log.LogInfo(
                        f"Could not get data for {entity_class.name.lower()} {entity.get('id')}"
                    )
                    continue

                update_entity(
                    client=client,
                    entity=entity,
                    entity_type=entity_class,
                    scraped_data=scraped_data,
                    create_missing_tags=create_missing_tags,
                    create_missing_performers=create_missing_performers,
                    create_missing_studios=create_missing_studios,
                    create_missing_movies=create_missing_movies)

                log.LogDebug(
                    f"Scraped data for {entity_class.name.lower()} {entity.get('id')}"
                )
                count += 1

        log.LogInfo(f"Scraped data for {count} {entity_class.value}")