def load_terms(df: DataFrame) -> int: df.drop_duplicates(inplace=True) df.loc[df["Case Sensitive"] == False, "Term"] = df.loc[ df["Case Sensitive"] == False, "Term"].str.lower() df = df.drop_duplicates(subset="Term").dropna(subset=["Term"]) terms = [] for row_id, row in df.iterrows(): term = row["Term"].strip() if not Term.objects.filter(term=term).exists(): lt = Term() lt.term = term lt.source = row["Term Category"] lt.definition_url = row["Term Locale"] terms.append(lt) # cache "global" term stems step - should be cached here via model manager Term.objects.bulk_create(terms) # update existing ProjectTermConfiguration objects for all projects across loaded terms from apps.extract.dict_data_cache import cache_term_stems from apps.project.models import ProjectTermConfiguration for config in ProjectTermConfiguration.objects.all(): cache_term_stems(config.project_id) config.add(terms) return len(df)
def cache_term(instance, **kwargs): from apps.extract.dict_data_cache import cache_term_stems # update global cache cache_term_stems() for project_id in instance.projecttermconfiguration_set.values_list('project_id', flat=True): # update project-term caches cache_term_stems(project_id)
def upload_df(self, df: pd.DataFrame) -> None: if Term.objects.exists(): print('Terms data already uploaded') return print('Uploading terms...') with transaction.atomic(): terms_count = load_terms(df) print('Detected %d terms' % terms_count) print('Caching terms config for Locate tasks...') dict_data_cache.cache_term_stems()
def terms_loader(zip_file: ZipFile, files: list) -> None: if Term.objects.exists(): print('Terms data already uploaded') return print('Uploading terms...') df = load_csv_files(zip_file, files) with transaction.atomic(): terms_count = load_terms(df) print('Detected %d terms' % terms_count) print('Caching terms config for Locate tasks...') dict_data_cache.cache_term_stems()
def cache_terms(instance, action, pk_set, **kwargs): # cache project terms only in case if terms have changed, i.e. pk_set != {} if action.startswith('post') and pk_set: from apps.extract.dict_data_cache import cache_term_stems cache_term_stems(instance.project.pk)
def delete_cached_term(instance, **kwargs): # update global cache from apps.extract.dict_data_cache import cache_term_stems cache_term_stems()
def bulk_create(self, objs, **kwargs): # to update global cached terms if they are loaded via fixtures super().bulk_create(objs, **kwargs) from apps.extract.dict_data_cache import cache_term_stems cache_term_stems()
def cache_term_stems(apps, schema_editor): dict_data_cache.cache_term_stems()