def upload_dict(model: str, files_mapping: Dict[str, str], username: str = HUGGINGFACE_USERNAME): """ Upload to huggingface repository, make sure already login using CLI, ``` huggingface-cli login ``` Parameters ---------- model: str it will become repository name. files_mapping: Dict[str, str] {local_file: target_file} username: str, optional (default=os.environ.get('HUGGINGFACE_USERNAME', 'huseinzol05')) """ try: create_repo(name=model) except Exception as e: logger.warning(e) repo_id = f'{username}/{model}' for file, file_remote in files_mapping.items(): upload_file(path_or_fileobj=file, path_in_repo=file_remote, repo_id=repo_id) logger.info(f'Uploaded from {file} to {repo_id}/{file_remote}')
def upload(model: str, directory: str, username: str = HUGGINGFACE_USERNAME): """ Upload to huggingface repository, make sure already login using CLI, ``` huggingface-cli login ``` Parameters ---------- model: str it will become repository name. directory: str local directory with files in it. username: str, optional (default=os.environ.get('HUGGINGFACE_USERNAME', 'huseinzol05')) """ try: create_repo(name=model) except Exception as e: logger.warning(e) repo_id = f'{username}/{model}' for file in glob(os.path.join(directory, '*')): file_remote = os.path.split(file)[1] upload_file(path_or_fileobj=file, path_in_repo=file_remote, repo_id=repo_id) logger.info(f'Uploading from {file} to {repo_id}/{file_remote}')
def _get_repo_url_from_name( repo_name: str, organization: Optional[str] = None, private: bool = None, use_auth_token: Optional[Union[bool, str]] = None, ) -> str: if isinstance(use_auth_token, str): token = use_auth_token elif use_auth_token: token = HfFolder.get_token() if token is None: raise ValueError( "You must login to the Hugging Face hub on this computer by typing `transformers-cli login` and " "entering your credentials to use `use_auth_token=True`. Alternatively, you can pass your own " "token as the `use_auth_token` argument.") else: token = None # Special provision for the test endpoint (CI) return create_repo( token, repo_name, organization=organization, private=private, repo_type=None, exist_ok=True, )
def setup(self, components: List[Component], flagging_dir: str): """ Params: flagging_dir (str): local directory where the dataset is cloned, updated, and pushed from. """ try: import huggingface_hub except (ImportError, ModuleNotFoundError): raise ImportError( "Package `huggingface_hub` not found is needed " "for HuggingFaceDatasetSaver. Try 'pip install huggingface_hub'." ) path_to_dataset_repo = huggingface_hub.create_repo( name=self.dataset_name, token=self.hf_foken, private=self.dataset_private, repo_type="dataset", exist_ok=True, ) self.path_to_dataset_repo = path_to_dataset_repo # e.g. "https://huggingface.co/datasets/abidlabs/test-audio-10" self.components = components self.flagging_dir = flagging_dir self.dataset_dir = os.path.join(flagging_dir, self.dataset_name) self.repo = huggingface_hub.Repository( local_dir=self.dataset_dir, clone_from=path_to_dataset_repo, use_auth_token=self.hf_foken, ) self.repo.git_pull() # Should filename be user-specified? self.log_file = os.path.join(self.dataset_dir, "data.csv") self.infos_file = os.path.join(self.dataset_dir, "dataset_infos.json")
def _create_repo( self, repo_id: str, private: Optional[bool] = None, use_auth_token: Optional[Union[bool, str]] = None, repo_url: Optional[str] = None, organization: Optional[str] = None, ): """ Create the repo if needed, cleans up repo_id with deprecated kwards `repo_url` and `organization`, retrives the token. """ if repo_url is not None: warnings.warn( "The `repo_url` argument is deprecated and will be removed in v5 of Transformers. Use `repo_id` " "instead.") repo_id = repo_url.replace(f"{HUGGINGFACE_CO_RESOLVE_ENDPOINT}/", "") if organization is not None: warnings.warn( "The `organization` argument is deprecated and will be removed in v5 of Transformers. Set your " "organization directly in the `repo_id` passed instead (`repo_id={organization}/{model_id}`)." ) if not repo_id.startswith(organization): if "/" in repo_id: repo_id = repo_id.split("/")[-1] repo_id = f"{organization}/{repo_id}" token = HfFolder.get_token( ) if use_auth_token is True else use_auth_token url = create_repo(repo_id=repo_id, token=token, private=private, exist_ok=True) # If the namespace is not there, add it or `upload_file` will complain if "/" not in repo_id and url != f"{HUGGINGFACE_CO_RESOLVE_ENDPOINT}/{repo_id}": repo_id = get_full_repo_name(repo_id, token=token) return repo_id, token