def update_config(model_identifier, updates): api = HfApi() model_list = api.model_list() model_dict = [ model_dict for model_dict in model_list if model_dict.modelId == model_identifier ][0] model_identifier = "_".join(model_identifier.split("/")) http = "https://s3.amazonaws.com/" hf_url = "models.huggingface.co/" config_path_aws = http + hf_url + model_dict.key file_name = f"./{model_identifier}_config.json" bash_command = f"curl {config_path_aws} > {file_name}" os.system(bash_command) with open(file_name) as f: config_json = json.load(f) bash_command = "rm {}".format(file_name) os.system(bash_command) ##### HERE YOU SHOULD STATE WHICH PARAMS WILL BE CHANGED ##### config_json.update(updates) # save config as it was saved before with open(file_name, "w") as f: json.dump(config_json, f, indent=2, sort_keys=True) # upload new config bash_command = f"s3cmd cp {file_name} s3://{hf_url + model_dict.key}" os.system(bash_command)
def main(model_identifier): api = HfApi() model_list = api.model_list() model_dict = [ model_dict for model_dict in model_list if model_dict.modelId == model_identifier ][0] model_identifier = "_".join(model_identifier.split("/")) http = "https://s3.amazonaws.com/" hf_url = "models.huggingface.co/" config_path_aws = http + hf_url + model_dict.key file_name = "./{}_config.json".format(model_identifier) bash_command = "curl {} > {}".format(config_path_aws, file_name) os.system(bash_command) with open(file_name) as f: config_json = json.load(f) bash_command = "rm {}".format(file_name) os.system(bash_command) ##### HERE YOU SHOULD STATE WHICH PARAMS WILL BE CHANGED ##### # e.g. config_json["decoder_start_token_id"] = 2 # save config as it was saved before with open(file_name, "w") as f: json.dump(config_json, f, indent=2, sort_keys=True) # upload new config bash_command = "aws s3 cp {} s3://{}".format(file_name, hf_url + model_dict.key) os.system(bash_command)
def find_pretrained_model(src_lang: str, tgt_lang: str) -> List[str]: """Find models that can accept src_lang as input and return tgt_lang as output.""" prefix = "Helsinki-NLP/opus-mt-" api = HfApi() model_list = api.model_list() model_ids = [x.modelId for x in model_list if x.modelId.startswith("Helsinki-NLP")] src_and_targ = [ remove_prefix(m, prefix).lower().split("-") for m in model_ids if "+" not in m ] # + cant be loaded. matching = [f"{prefix}{a}-{b}" for (a, b) in src_and_targ if src_lang in a and tgt_lang in b] return matching
def clean_all_community_configs(model_list=None, do_upload=False, do_delete=True): api = HfApi() model_dict_list = api.model_list() if model_list is not None: model_dict_list = [model_dict for model_dict in model_dict_list if model_dict.modelId in model_list] for i, model_dict in enumerate(model_dict_list): model_identifier = model_dict.modelId hf_url = 'models.huggingface.co/' local_model_path = '_'.join(model_identifier.split('/')) path_to_config = "./{}_config.json".format(local_model_path) print("\n{}: Summary for {}".format(i, model_identifier)) print(50 * '-') try: config = AutoConfig.from_pretrained(model_identifier) except Exception as e: # noqa: E722 print('CONF ERROR: {} config can not be loaded'.format(model_identifier)) print('Message: {}'.format(e)) print(50 * '=') continue # create temp dir temp_dir = path_to_config.split('.json')[0] if os.path.exists(temp_dir): os.system('rm -r {}'.format(temp_dir)) os.mkdir(temp_dir) # save config locally to only use diff config.save_pretrained(temp_dir) diff_config = AutoConfig.from_pretrained(os.path.join(temp_dir, 'config.json')) if config != diff_config: print("Author: {} needs to be notified about changed conf: {}".format(model_dict.author, model_identifier)) if do_upload is True: # upload new config bash_command = 'aws s3 cp {} s3://{}'.format(os.path.join(temp_dir, 'config.json'), hf_url + model_dict.key) os.system(bash_command) # delete saved config if do_delete is True: os.system('rm -r {}'.format(temp_dir)) print(50 * '=')
def change_model_list(change_fn, model_list=None, do_upload=False, key_word=None): api = HfApi() model_dict_list = api.model_list() if model_list is not None: model_dict_list = [ model_dict for model_dict in model_dict_list if model_dict.modelId in model_list ] if key_word is not None: model_dict_list = [ model_dict for model_dict in model_dict_list if key_word in model_dict.modelId ] for model_dict in model_dict_list: model_identifier = model_dict.modelId print("model_identifier") http = 'https://s3.amazonaws.com/' hf_url = 'models.huggingface.co/' config_path_aws = http + hf_url + model_dict.key model_identifier = '_'.join(model_identifier.split('/')) path_to_config, config_json = download(config_path_aws, model_identifier) config_json = change_fn(config_json) # save config as it was saved before with open(path_to_config, 'w') as f: json.dump(config_json, f, indent=2, sort_keys=True) if do_upload is True: # upload new config bash_command = 'aws s3 cp {} s3://{}'.format( path_to_config, hf_url + model_dict.key) os.system(bash_command) # delete saved config os.system('rm {}'.format(path_to_config))
def test_model_list(self): _api = HfApi() models = _api.model_list() self.assertGreater(len(models), 100) self.assertIsInstance(models[0], ModelInfo)
def test_staging_model_list(self): _api = HfApi(endpoint=ENDPOINT_STAGING) _ = _api.model_list()
def get_all_model_paths(): api = HfApi() model_list = [model_dict.modelId for model_dict in api.model_list()] return model_list