def thread_function(q: Queue, thread_lock: threading.Lock, count: int, total: int, client: StashInterface): log.LogDebug(f"Created {threading.current_thread().name}") while not q.empty(): image = q.get() image_data = { 'id': image.get('id'), 'title': image.get('title') } if image.get('rating'): image_data['rating'] = image.get('rating') if image.get('studio'): image_data['studio_id'] = image.get('studio').get('id') if image.get('performers'): performer_ids = [p.get('id') for p in image.get('performers')] image_data['performer_ids'] = performer_ids if image.get('tags'): tag_ids = [t.get('id') for t in image.get('tags')] image_data['tag_ids'] = tag_ids if image.get('galleries'): gallery_ids = [g.get('id') for g in image.get('galleries')] image_data['gallery_ids'] = gallery_ids client.updateImage(image_data) thread_lock.acquire() count += 1 log.LogProgress(count / total) thread_lock.release() q.task_done() log.LogDebug(f"{threading.current_thread().name} finished") return True
def doLongTask(): total = 100 upTo = 0 log.LogInfo("Doing long task") while upTo < total: time.sleep(1) log.LogProgress(float(upTo) / float(total)) upTo = upTo + 1
def __bulk_scrape_scene_url(client, scenes, delay=5): last_request = -1 if delay > 0: # Initialize last request with current time + delay time last_request = time.time() + delay # Number of scraped scenes count = 0 total = len(scenes) # Index for progress bar i = 0 # Scrape scene with existing metadata for scene in scenes: # Update status bar i += 1 log.LogProgress(i/total) if delay: wait(delay, last_request, time.time()) # Create dict with scene data scene_data = { 'id': scene.get('id'), } # Extract scraper ID if appended to control tag, then scrape scene if '_' in control_tag: scraper_id = control_tag.split('_')[-1] scraped_data = client.scrapeScene(scene_data, scraper_id) else: scraped_data = client.scrapeScene(scene_data) # No data has been found for this scene if scraped_data is None or not any(scraped_data.values()): log.LogInfo(f"Could not get data for scene {scene.get('id')}") continue # Create dict with scene data update_data = { 'id': scene.get('id') } if scraped_data.get('url'): update_data['url'] = scraped_data.get('url') # Update scene with scraped scene data client.updateScene(update_data) log.LogDebug(f"Scraped data for scene {scene.get('id')}") count += 1 return count
def read_urls_and_download(): with open(os.path.join(plugin_folder, 'urls.txt'), 'r') as url_file: urls = url_file.readlines() downloaded = [] total = len(urls) i = 0 for url in urls: i += 1 log.LogProgress(i/total) if check_url_valid(url.strip()): download(url.strip(), downloaded) if os.path.isfile(downloaded_json): shutil.move(downloaded_json, downloaded_backup_json) with open(downloaded_json, 'w') as outfile: json.dump(downloaded, outfile)
def __bulk_scrape(client, scenes, create_missing_performers=False, create_missing_tags=False, create_missing_studios=False, delay=5): last_request = -1 if delay > 0: # Initialize last request with current time + delay time last_request = time.time() + delay missing_scrapers = list() # Number of scraped scenes count = 0 total = len(scenes) # Index for progress bar i = 0 # Scrape if url not in missing_scrapers for scene in scenes: # Update status bar i += 1 log.LogProgress(i / total) if scene.get('url') is None or scene.get('url') == "": log.LogInfo(f"Scene {scene.get('id')} is missing url") continue if urlparse(scene.get("url")).netloc not in missing_scrapers: if delay: wait(delay, last_request, time.time()) scraped_data = client.scrapeSceneURL(scene.get('url')) # If result is null, add url to missing_scrapers if scraped_data is None: log.LogWarning( f"Missing scraper for {urlparse(scene.get('url')).netloc}") missing_scrapers.append(urlparse(scene.get('url')).netloc) continue # No data has been found for this scene if not any(scraped_data.values()): log.LogInfo(f"Could not get data for scene {scene.get('id')}") continue # Create dict with scene data update_data = {'id': scene.get('id')} if scraped_data.get('title'): update_data['title'] = scraped_data.get('title') if scraped_data.get('details'): update_data['details'] = scraped_data.get('details') if scraped_data.get('date'): update_data['date'] = scraped_data.get('date') if scraped_data.get('image'): update_data['cover_image'] = scraped_data.get('image') if scraped_data.get('tags'): tag_ids = list() for tag in scraped_data.get('tags'): if tag.get('stored_id'): tag_ids.append(tag.get('stored_id')) else: if create_missing_tags and tag.get('name') != "": # Capitalize each word tag_name = " ".join( x.capitalize() for x in tag.get('name').split(" ")) log.LogInfo(f'Create missing tag: {tag_name}') tag_id = client.createTagWithName(tag_name) tag_ids.append(tag_id) if len(tag_ids) > 0: update_data['tag_ids'] = tag_ids if scraped_data.get('performers'): performer_ids = list() for performer in scraped_data.get('performers'): if performer.get('stored_id'): performer_ids.append(performer.get('stored_id')) else: if create_missing_performers and performer.get( 'name') != "": performer_name = " ".join( x.capitalize() for x in performer.get('name').split(" ")) log.LogInfo( f'Create missing performer: {performer_name}') performer_id = client.createPerformerByName( performer_name) performer_ids.append(performer_id) if len(performer_ids) > 0: update_data['performer_ids'] = performer_ids if scraped_data.get('studio'): studio = scraped_data.get('studio') if studio.get('stored_id'): update_data['studio_id'] = studio.get('stored_id') else: if create_missing_studios: studio_name = " ".join( x.capitalize() for x in studio.get('name').split(" ")) log.LogInfo(f'Creating missing studio {studio_name}') studio_url = '{uri.scheme}://{uri.netloc}'.format( uri=urlparse(scene.get('url'))) studio_id = client.createStudio( studio_name, studio_url) update_data['studio_id'] = studio_id # Update scene with scraped scene data client.updateScene(update_data) log.LogDebug(f"Scraped data for scene {scene.get('id')}") count += 1 return count
def __bulk_create_performer(client, scenes, create_missing_performers, parse_performer_pattern, delay): last_request = -1 if delay > 0: # Initialize last request with current time + delay time last_request = time.time() + delay # Number of created performers count = 0 total = len(scenes) # Index for progress bar i = 0 # List all performers in database all_performers = client.listPerformers() for scene in scenes: # Update status bar i += 1 log.LogProgress(i/total) if scene.get('path') is None or scene.get('path') == "": log.LogInfo(f"Scene {scene.get('id')} is missing path") continue # Parse performer name from scene basename file path scene_basename = os.path.basename(scene['path']) log.LogInfo(f"Scene basename is: {scene_basename}") performer_regex = re.compile(parse_performer_pattern) parsed_performer_regex = performer_regex.search(scene_basename) if parsed_performer_regex is None: log.LogInfo(f"No Performer found Scene {scene.get('id')} filename") continue parsed_performer_name = ' '.join(parsed_performer_regex.groups()) log.LogInfo(f"Parsed performer name is: {parsed_performer_name}") # If performer name successfully parsed from scene basename if parsed_performer_name: # Create dict with scene data update_data = { 'id': scene.get('id') } # List all performers currently attached to scene scene_performers = [sp['name'].lower() for sp in scene['performers']] log.LogInfo(f"Current scene performers are: {scene_performers}") # Check if performer already attached to scene performer_ids = list() if parsed_performer_name.lower() in scene_performers: continue else: # Check if performer already exists in database for performer in all_performers: if performer['name'] and parsed_performer_name.lower() == performer['name'].lower(): performer_ids.append(performer['id']) break if performer['aliases'] and parsed_performer_name.lower() in [p.strip().lower() for p in performer['aliases'].replace('/', ',').split(',')]: performer_ids.append(performer['id']) break else: # Create performer if not in database if create_missing_performers and parsed_performer_name != "": performer_name = " ".join(x.capitalize() for x in parsed_performer_name.split(" ")) log.LogInfo(f'Create missing performer: {performer_name}') performer_id = client.createPerformerByName(performer_name) performer_ids.append(performer_id) # Add newly created performer to all performers list all_performers.append({'id':performer_id, 'name':performer_name, 'aliases':''}) # Add found/created performer IDs to scene update data if len(performer_ids) > 0: update_data['performer_ids'] = performer_ids log.LogInfo(f"Performer IDs found: {performer_ids}") # Update scene with parsed performer data client.updateScene(update_data) log.LogDebug(f"Updated performer data for scene {scene.get('id')}") count += 1 return count
def tag_scenes(client): endRegex = r'\.(?:[mM][pP]4 |[wW][mM][vV])$' beginRegex = ".*(" if not os.path.isfile(downloaded_json) and os.path.isfile(downloaded_backup_json): shutil.copyfile(downloaded_backup_json, downloaded_json) with open(downloaded_json) as json_file: data = json.load(json_file) for i in range(0, len(data)): if i < len(data) - 1: beginRegex += data[i]['id'] + "|" else: beginRegex += data[i]['id'] + ").*" log.LogDebug(beginRegex + endRegex) scenes = client.findScenesByPathRegex(beginRegex) total = len(scenes) i = 0 for scene in scenes: i += 1 log.LogProgress(i/total) log.LogDebug(os.path.join("ScenePath", scene.get('path'))) basename = os.path.basename(scene.get('path')) filename = os.path.splitext(basename)[0] found_video = None for video in data: if video['id'] in filename: found_video = video break if found_video is not None: scene_data = { 'id': scene.get('id'), 'url': video['url'], 'title': video['title'] } # Required, would be cleared otherwise if scene.get('rating'): scene_data['rating'] = scene.get('rating') tag_ids = [] for t in scene.get('tags'): tag_ids.append(t.get('id')) tag_ids.append(get_scrape_tag(client)) scene_data['tag_ids'] = tag_ids performer_ids = [] for p in scene.get('performers'): performer_ids.append(p.get('id')) scene_data['performer_ids'] = performer_ids if scene.get('studio'): scene_data['studio_id'] = scene.get('studio').get('id') if scene.get('gallery'): scene_data['gallery_id'] = scene.get('gallery').get('id') if scene.get('rating'): scene_data['rating'] = scene.get('rating') client.updateScene(scene_data)
def createPerformers(client): performers = client.listPerformers() performers_to_lookup = set() idx = 0 while True: scenes = client.listScenes(idx) idx += 1 if not scenes: break for scene in scenes: path = scene["path"] performers_in_scene = [s["name"].lower() for s in scene["performers"]] file_name = os.path.basename(path) file_name, _ = os.path.splitext(file_name) file_name = file_name.replace("-", ",").replace(",", " , ") doc = nlp(file_name) performers_names = set() for w in doc.ents: if w.label_ == "PERSON": performers_names.add(w.text.strip().title()) if len(file_name.split()) == 2 and not any( char.isdigit() for char in file_name ): performers_names.add(file_name.strip().title()) for p in performers_names: if ( p.lower() not in performers_in_scene and p.lower() not in performers and len(p.split()) != 1 ): performers_to_lookup.add(p) total = len(performers_to_lookup) total_added = 0 log.LogInfo("Going to look up {} performers".format(total)) for i, performer in enumerate(performers_to_lookup): log.LogInfo("Searching: " + performer) log.LogProgress(float(i) / float(total)) try: data = client.findPerformer(performer) except Exception as e: log.LogError(str(e)) continue # Add a little random sleep so we don't flood the services time.sleep(random.uniform(0.2, 1)) if not data: continue if data.get('gender'): data["gender"] = data["gender"].upper() data = {k: v for k, v in data.items() if v is not None and v != ""} log.LogInfo("Adding: " + performer) try: client.createPerformer(data) total_added += 1 except Exception as e: log.LogError(str(e)) log.LogInfo("Added a total of {} performers".format(total_added)) log.LogInfo("Done!")
def __bulk_scrape(client, entities: Dict[Entity, array.array], create_missing_performers=False, create_missing_tags=False, create_missing_studios=False, create_missing_movies=False, delay=5) -> None: last_request = -1 # Unpack entity dict and iterate over each type (scenes, galleries) # entities is non empty and contains at least one non empty entity type for entity_class, entity_array in entities.items(): log.LogInfo(f"Scraping {entity_class.value}") # Fetch available url scrapers for entity type if entity_class is Entity.Scene: supported_scrapers = client.sceneScraperURLs() elif entity_class is Entity.Gallery: supported_scrapers = client.galleryScraperURLs() else: raise TypeError(f"Unexpected Entity type: {entity_class}") if delay > 0: # Initialize last request with current time + delay time last_request = time.time() + delay missing_scrapers = list() # Number of scraped scenes count = 0 total = len(entity_array) # Index for progress bar i = 0 # Scrape if url not in missing_scrapers for entity in entity_array: # Update status bar i += 1 log.LogProgress(i / total) if entity.get('url') is None or entity.get('url') == "": # Skip the scene/gallery if it does not have an url log.LogInfo( f"{entity_class.name} {entity.get('id')} is missing url") continue url_netloc = urlparse(entity.get("url")).netloc.split('www.')[ -1] # URL domain name (without www. and tld) if url_netloc not in missing_scrapers: if delay: last_request = wait(delay, last_request, time.time()) # The query has different fields, so there can not be one scrapeURL function if entity_class is Entity.Scene: scraped_data = client.scrapeSceneURL(entity.get('url')) elif entity_class is Entity.Gallery: scraped_data = client.scrapeGalleryURL(entity.get('url')) else: raise TypeError(f"Unexpected Entity type: {entity_class}") if scraped_data is None: if url_netloc not in supported_scrapers: # If result is null, and url is not in list of supported scrapers, add url to missing_scrapers # Faster then checking every time, if url is in list of supported scrapers log.LogWarning( f"{entity_class.name} {entity.get('id')}: " f"Missing scraper for {url_netloc}") log.LogDebug(f"Full url: {entity.get('url')}") missing_scrapers.append(url_netloc) else: log.LogInfo( f"Could not scrape {entity_class.name.lower()} {entity.get('id')}" ) log.LogDebug("Return data was None") continue # No data has been found for this scene if not any(scraped_data.values()): log.LogInfo( f"Could not get data for {entity_class.name.lower()} {entity.get('id')}" ) continue update_entity( client=client, entity=entity, entity_type=entity_class, scraped_data=scraped_data, create_missing_tags=create_missing_tags, create_missing_performers=create_missing_performers, create_missing_studios=create_missing_studios, create_missing_movies=create_missing_movies) log.LogDebug( f"Scraped data for {entity_class.name.lower()} {entity.get('id')}" ) count += 1 log.LogInfo(f"Scraped data for {count} {entity_class.value}")