def main(model_identifier): api = HfApi() model_list = api.model_list() model_dict = [ model_dict for model_dict in model_list if model_dict.modelId == model_identifier ][0] model_identifier = "_".join(model_identifier.split("/")) http = "https://s3.amazonaws.com/" hf_url = "models.huggingface.co/" config_path_aws = http + hf_url + model_dict.key file_name = "./{}_config.json".format(model_identifier) bash_command = "curl {} > {}".format(config_path_aws, file_name) os.system(bash_command) with open(file_name) as f: config_json = json.load(f) bash_command = "rm {}".format(file_name) os.system(bash_command) ##### HERE YOU SHOULD STATE WHICH PARAMS WILL BE CHANGED ##### # e.g. config_json["decoder_start_token_id"] = 2 # save config as it was saved before with open(file_name, "w") as f: json.dump(config_json, f, indent=2, sort_keys=True) # upload new config bash_command = "aws s3 cp {} s3://{}".format(file_name, hf_url + model_dict.key) os.system(bash_command)
def update_config(model_identifier, updates): api = HfApi() model_list = api.model_list() model_dict = [ model_dict for model_dict in model_list if model_dict.modelId == model_identifier ][0] model_identifier = "_".join(model_identifier.split("/")) http = "https://s3.amazonaws.com/" hf_url = "models.huggingface.co/" config_path_aws = http + hf_url + model_dict.key file_name = f"./{model_identifier}_config.json" bash_command = f"curl {config_path_aws} > {file_name}" os.system(bash_command) with open(file_name) as f: config_json = json.load(f) bash_command = "rm {}".format(file_name) os.system(bash_command) ##### HERE YOU SHOULD STATE WHICH PARAMS WILL BE CHANGED ##### config_json.update(updates) # save config as it was saved before with open(file_name, "w") as f: json.dump(config_json, f, indent=2, sort_keys=True) # upload new config bash_command = f"s3cmd cp {file_name} s3://{hf_url + model_dict.key}" os.system(bash_command)
def find_pretrained_model(src_lang: str, tgt_lang: str) -> List[str]: """Find models that can accept src_lang as input and return tgt_lang as output.""" prefix = "Helsinki-NLP/opus-mt-" api = HfApi() model_list = api.model_list() model_ids = [x.modelId for x in model_list if x.modelId.startswith("Helsinki-NLP")] src_and_targ = [ remove_prefix(m, prefix).lower().split("-") for m in model_ids if "+" not in m ] # + cant be loaded. matching = [f"{prefix}{a}-{b}" for (a, b) in src_and_targ if src_lang in a and tgt_lang in b] return matching
def clean_all_community_configs(model_list=None, do_upload=False, do_delete=True): api = HfApi() model_dict_list = api.model_list() if model_list is not None: model_dict_list = [model_dict for model_dict in model_dict_list if model_dict.modelId in model_list] for i, model_dict in enumerate(model_dict_list): model_identifier = model_dict.modelId hf_url = 'models.huggingface.co/' local_model_path = '_'.join(model_identifier.split('/')) path_to_config = "./{}_config.json".format(local_model_path) print("\n{}: Summary for {}".format(i, model_identifier)) print(50 * '-') try: config = AutoConfig.from_pretrained(model_identifier) except Exception as e: # noqa: E722 print('CONF ERROR: {} config can not be loaded'.format(model_identifier)) print('Message: {}'.format(e)) print(50 * '=') continue # create temp dir temp_dir = path_to_config.split('.json')[0] if os.path.exists(temp_dir): os.system('rm -r {}'.format(temp_dir)) os.mkdir(temp_dir) # save config locally to only use diff config.save_pretrained(temp_dir) diff_config = AutoConfig.from_pretrained(os.path.join(temp_dir, 'config.json')) if config != diff_config: print("Author: {} needs to be notified about changed conf: {}".format(model_dict.author, model_identifier)) if do_upload is True: # upload new config bash_command = 'aws s3 cp {} s3://{}'.format(os.path.join(temp_dir, 'config.json'), hf_url + model_dict.key) os.system(bash_command) # delete saved config if do_delete is True: os.system('rm -r {}'.format(temp_dir)) print(50 * '=')
def test_model_names(self): model_list = HfApi().model_list() model_ids = [ x.modelId for x in model_list if x.modelId.startswith(ORG_NAME) ] bad_model_ids = [mid for mid in model_ids if "+" in model_ids] self.assertListEqual([], bad_model_ids) self.assertGreater(len(model_ids), 500)
def change_model_list(change_fn, model_list=None, do_upload=False, key_word=None): api = HfApi() model_dict_list = api.model_list() if model_list is not None: model_dict_list = [ model_dict for model_dict in model_dict_list if model_dict.modelId in model_list ] if key_word is not None: model_dict_list = [ model_dict for model_dict in model_dict_list if key_word in model_dict.modelId ] for model_dict in model_dict_list: model_identifier = model_dict.modelId print("model_identifier") http = 'https://s3.amazonaws.com/' hf_url = 'models.huggingface.co/' config_path_aws = http + hf_url + model_dict.key model_identifier = '_'.join(model_identifier.split('/')) path_to_config, config_json = download(config_path_aws, model_identifier) config_json = change_fn(config_json) # save config as it was saved before with open(path_to_config, 'w') as f: json.dump(config_json, f, indent=2, sort_keys=True) if do_upload is True: # upload new config bash_command = 'aws s3 cp {} s3://{}'.format( path_to_config, hf_url + model_dict.key) os.system(bash_command) # delete saved config os.system('rm {}'.format(path_to_config))
def available(self): """ Runs a query to get a list of available language models from the Hugging Face API. Returns: list of available language name ids """ return set(x.modelId for x in HfApi().model_list() if x.modelId.startswith("Helsinki-NLP"))
def test_hub_configs(self): """I put require_torch_and_cuda cause I only want this to run with self-scheduled.""" model_list = HfApi().model_list() org = "sshleifer" model_ids = [x.modelId for x in model_list if x.modelId.startswith(org)] allowed_to_be_broken = ["sshleifer/blenderbot-3B", "sshleifer/blenderbot-90M"] failures = [] for m in model_ids: if m in allowed_to_be_broken: continue try: AutoConfig.from_pretrained(m) except Exception: failures.append(m) assert not failures, f"The following models could not be loaded through AutoConfig: {failures}"
def fit(self, inputs=None, wait=True, logs="All", job_name=None, experiment_config=None): if self.huggingface_token and wait is True: logger.info(f"creating repository {self.base_job_name} on the HF hub") self.repo_url = HfApi().create_repo(token=self.huggingface_token, name=self.base_job_name) # parent fit method super(HuggingFace, self).fit(inputs, wait, logs, job_name, experiment_config) if self.huggingface_token and wait is True: logger.info(f"downloading model to {self.latest_training_job.name}/ ") self.download_model(".", True) logger.info(f"initalizing model repository ") model_repo = HfRepository( repo_url=self.repo_url, huggingface_token=self.huggingface_token, model_dir=f"./{self.latest_training_job.name}", ) model_repo.init_new_repository() logger.info("uploading model files to HF hub") model_repo.commit_files_and_push_to_hub()
def __init__(self, args): self.args = args self._api = HfApi()
class HfApiCommonTest(unittest.TestCase): _api = HfApi(endpoint=ENDPOINT_STAGING)
def test_model_list(self): _api = HfApi() models = _api.model_list() self.assertGreater(len(models), 100) self.assertIsInstance(models[0], ModelInfo)
def test_staging_model_list(self): _api = HfApi(endpoint=ENDPOINT_STAGING) _ = _api.model_list()
def get_all_model_paths(): api = HfApi() model_list = [model_dict.modelId for model_dict in api.model_list()] return model_list
class HfApiCommonTest(unittest.TestCase): _api = HfApi(endpoint="https://moon-staging.huggingface.co")
def test_model_count(self): model_list = HfApi().model_list() expected_num_models = 1011 actual_num_models = len( [x for x in model_list if x.modelId.startswith("Helsinki-NLP")]) self.assertEqual(expected_num_models, actual_num_models)