Exemplo n.º 1
0
    def __init__(self,
                 azure_root,
                 local_lambda,
                 leave_space=0,
                 storage_credential=None,
                 subpath="."):
        super(AzureP2P, self).__init__()
        self.azure_root = azure_root
        self.subpath = subpath
        self.local_lambda = local_lambda  #When called, returns a unique name (e.g. 172.10.12.3) and root storage (e.g. \\172.10.12.3\scratch). Expect repeated called to return the same two values #!!! document this
        self.leave_space = leave_space
        self.storage_credential = storage_credential

        self.azure_storage = AzureStorage(
            path_join(self.azure_root, "storage", subpath),
            local_lambda=self.local_lambda,
            storage_credential=self.storage_credential)

        self.dib_directory = AzureStorage(
            path_join(self.azure_root, "dibs", subpath),
            local_lambda=self.local_lambda,
            storage_credential=storage_credential)

        def storage_lambda():
            unique_name, root = self.local_lambda()
            return unique_name, root + path_join(self.azure_root, "storage",
                                                 subpath)

        azure_directory = AzureStorage(
            path_join(self.azure_root, "directory", subpath),
            local_lambda=self.local_lambda,
            storage_credential=self.storage_credential)
        self.file_share = PeerToPeer(directory=azure_directory,
                                     local_lambda=storage_lambda,
                                     leave_space=self.leave_space)
Exemplo n.º 2
0
    def zzztest_big_files_fileshare(self):  #!!!needs to be updated
        logging.info("test_big_files_fileshare")
        from onemil.AzureP2P import AzureP2P, ip_address_local
        from onemil.file_cache import AzureStorage, PeerToPeer
        directory = AzureStorage(folder="testazureshardcontainer/fileshare2",
                                 local_lambda=ip_address_local,
                                 prefix="AzureDirectory",
                                 storage_account=self.storage_account,
                                 storage_key=self.storage_key)
        storage = PeerToPeer(directory=directory,
                             local_lambda=ip_address_local)

        big_size = int(4e9)
        update_python_path = "once"
        double_it = True
        AzureBatch(task_count=1,
                   pool_id="d14v2_300",
                   update_python_path=update_python_path)

        while True:
            for n in [16, 4, 14, 2, 12, 10, 8]:
                storage.directory.rmtree()
                runner = AzureBatch(task_count=n * 2 if double_it else n,
                                    pool_id="d14v2_300",
                                    update_python_path="no")
                #runner = Local()
                #runner = LocalMultiThread(n*2 if double_it else n)
                mbps_list = self._big_files_fileshare_internal(
                    big_size, n, runner, storage, double_it)
                logging.info("{0}\t{1}".format(n, mbps_list))
Exemplo n.º 3
0
 def storage_closure():
     temp_dir = self._temp_dir() #increments self.count
     count = self.count
     def local_lambda():
         unique_name, root = "{0}.{1}".format(os.environ['COMPUTERNAME'],count), temp_dir+"/storage"
         return unique_name, root
     storage = PeerToPeer(directory=AzureStorage("/flstor/testazurep2p/fileshare/directory",local_lambda=lambda:(None,directory)),local_lambda=local_lambda)
     return storage
Exemplo n.º 4
0
 def storage_closure():
     temp_dir = self._temp_dir() #increments self.count
     count = self.count
     def local_lambda():
         unique_name, root = "{0}.{1}".format(os.environ['COMPUTERNAME'],count), temp_dir+"/storage"
         return unique_name, root
     storage = PeerToPeer(directory=directory,local_lambda=local_lambda)
     return storage
Exemplo n.º 5
0
class AzureP2P(FileCache):
    '''
    A class that subclasses :class:`FileCache` to provide peer-to-peer file sharing backed up by Azure Storage.
    an Azure batch account.

    **Constructor:**
        :Parameters: * **azure_root** (*string*) -- The path on Azure storage under which data will be stored. The form of the path is:
                           /STORAGEACCOUNT/CONTAINER/morepath.
                     * **local_lambda** (*a zero-augment lambda*) -- When called, tells were to store data locally. See :func:`ip_address_local`.
                     * **leave_space** (*integer*) -- (default 0) Tells the minimum amount of local storage space that should be kept free.
                     * **storage_credential** (*:class:`StorageCredential`*) -- (default '~/azurebatch/cred.txt') Keys and names of Azure Storage and AzureBatch
                           accounts to use.
                     * **subpath** (*string*) -- (default '.') Additional path to append to **azure_root**.

    **All the methods of FileCache plus these:**
    '''
    def __init__(self,
                 azure_root,
                 local_lambda,
                 leave_space=0,
                 storage_credential=None,
                 subpath="."):
        super(AzureP2P, self).__init__()
        self.azure_root = azure_root
        self.subpath = subpath
        self.local_lambda = local_lambda  #When called, returns a unique name (e.g. 172.10.12.3) and root storage (e.g. \\172.10.12.3\scratch). Expect repeated called to return the same two values #!!! document this
        self.leave_space = leave_space
        self.storage_credential = storage_credential

        self.azure_storage = AzureStorage(
            path_join(self.azure_root, "storage", subpath),
            local_lambda=self.local_lambda,
            storage_credential=self.storage_credential)

        self.dib_directory = AzureStorage(
            path_join(self.azure_root, "dibs", subpath),
            local_lambda=self.local_lambda,
            storage_credential=storage_credential)

        def storage_lambda():
            unique_name, root = self.local_lambda()
            return unique_name, root + path_join(self.azure_root, "storage",
                                                 subpath)

        azure_directory = AzureStorage(
            path_join(self.azure_root, "directory", subpath),
            local_lambda=self.local_lambda,
            storage_credential=self.storage_credential)
        self.file_share = PeerToPeer(directory=azure_directory,
                                     local_lambda=storage_lambda,
                                     leave_space=self.leave_space)

    def _simple_join(self, path):
        assert not self.azure_storage.file_exists(
            path), "Can't treat an existing file as a directory"
        return AzureP2P(self.azure_root,
                        local_lambda=self.local_lambda,
                        leave_space=self.leave_space,
                        storage_credential=self.storage_credential,
                        subpath=path_join(self.subpath, path))

    def __repr__(self):
        return "{0}('{1}')".format(self.__class__.__name__, self.name)

    @property
    def name(self):
        return self.azure_root

    def _simple_file_exists(self, simple_file_name):
        return self.azure_storage._simple_file_exists(simple_file_name)

    @contextmanager
    def _simple_open_write(self, simple_file_name, size=0, updater=None):
        subhandle_as = self.azure_storage.open_write(simple_file_name,
                                                     size=size,
                                                     updater=updater)
        subhandle_as_file_name = subhandle_as.__enter__()

        if self.file_share._simple_file_exists(simple_file_name):
            logging.warning(
                "The AzureStorage doesn't already have the file that is being written, but the PeerToPeer does, so removing it from the PeerToPeer. {0},'{1}'"
                .format(self.file_share, simple_file_name))
            self.file_share._simple_remove(simple_file_name)
        subhandle_fs = self.file_share.open_write(simple_file_name,
                                                  size=size,
                                                  updater=updater)
        subhandle_fs_file_name = subhandle_fs.__enter__()

        assert os.path.normpath(subhandle_fs_file_name) == os.path.normpath(
            subhandle_as_file_name
        ), "Expect that the two ways of distributing files to agree on the local file name"

        yield subhandle_fs_file_name

        subhandle_fs.__exit__(None, None, None)
        subhandle_as.__exit__(
            None, None,
            None)  #This one needs to be last because it sets the file date

    @contextmanager
    def _simple_open_read(self, simple_file_name, updater=None):

        # We assume that we could use either sub-storage and that the only problem
        # that could happen is that the file_share's remote machine containing the "main"
        # file share copy could have been recycled and thus the file would be missing.

        is_ok = False
        try:
            subhandle1 = self.file_share._simple_open_read(
                simple_file_name
            )  #!!!should file share be self-repairing. If the "main" is gone, pick one of the others
            subhandle1_file_name = subhandle1.__enter__()
            is_ok = True
        except Exception as e:
            logging.info(
                "AzureP2P - machine-to-machine copy of '{0}' failed, so reading from AzureStorage. Exception='{1}'"
                .format(simple_file_name, e))
        if is_ok:
            yield subhandle1_file_name
            subhandle1.__exit__(None, None, None)
            return

        #We are now in a situation in which multiple readers have failed to find the file in the PeerToPeer. We would like one of them to
        #download from AzureStorage, while the others wait. When that one has finished, the others can then get it from PeerToPeer.

        #Dib the file and see if you are 1st.
        # If so, double check that PeerToPeer isn't there now ((in case someone fixed everything already) if it is, use it) otherwise download and share.
        # If not wait until the first dib is gone, then clear your dib and use the file share.

        unique_name = self.local_lambda()[0]
        dib_path = self.dib_directory.join(simple_file_name)
        dir_path = self.file_share.directory.join(simple_file_name)
        dib_lib = DibLib(unique_name, dib_path, dir_path, "azure_storage_dib")
        status = dib_lib.wait_for_turn()
        logging.info("status is '{0}'".format(status))
        if status == 'fixed':
            logging.info(
                "After waiting for someone else to fix the problem, can now read the file with PeerToPeer"
            )
            read_handle = self.file_share._simple_open_read(
                simple_file_name
            )  #!!!should file share be self-repairing. If the "main" is gone, pick one of the others
            yield read_handle.__enter__()
            read_handle.__exit__(None, None, None)
            dib_lib.remove_dibs()
            return
        elif status == 'azure':
            is_ok = False
            try:
                logging.info("Before I try azure, let's try file_share again")
                read_handle = self.file_share._simple_open_read(
                    simple_file_name
                )  #!!! should file share be self-repairing. If the "main" is gone, pick one of the others
                file_name2 = read_handle.__enter__()
                is_ok = True
            except Exception as e:
                logging.info(
                    "2nd try of reading from PeerToPeer failed with message '{0}'"
                    .format(e.message))
            if is_ok:
                yield file_name2
                read_handle.__exit__(None, None, None)
                dib_lib.remove_dibs()
                return

            logging.info("downloading from Azure")
            read_handle = self.azure_storage._simple_open_read(
                simple_file_name)
            file_name3 = read_handle.__enter__()
            self._simple_register_with_peer_to_peer(simple_file_name,
                                                    file_name3)
            yield file_name3
            read_handle.__exit__(None, None, None)
            dib_lib.remove_dibs()
            return
        else:
            raise Exception("Don't know status '{0}'".format(status))

    def _simple_register_with_peer_to_peer(self, simple_file_name,
                                           local_file_name):
        #Removes any current peer-to-peer entry and adds a new one
        temp_name = local_file_name + "." + format(hash(os.times())) + ".temp"
        os.rename(local_file_name,
                  temp_name)  #Rename so that this remove doesn't remove it
        if self.file_share._simple_file_exists(simple_file_name):
            self.file_share._simple_remove(simple_file_name)
        with self.file_share._simple_open_write(
                simple_file_name, size=0
        ) as file_name:  #size 0 because we already have space allocated.
            os.rename(temp_name, local_file_name)  #Put file back
            assert os.path.normpath(local_file_name) == os.path.normpath(
                file_name
            ), "Expect that the two ways of distributing files to agree on the local file name"

    def _simple_getmtime(self, simple_file_name):
        return self.azure_storage._simple_getmtime(simple_file_name)

    def _simple_rmtree(self, log_writer=None):

        #If they all share a common directory, kill that directory
        #why this could be a bad idea: What if the azure_shard_container's are different or if there is another directory there of interest
        #why this could be a great idea: If this is top-level Azure Storage container, deleting will take a second instead of an hour.
        if isinstance(self.file_share.directory, AzureStorage):

            def drop_end(folder):
                return '/'.join(folder.split('/')[:-1])

            folder1 = drop_end(self.file_share.directory.folder)
            folder2 = drop_end(self.dib_directory.folder)
            folder3 = drop_end(self.azure_storage.folder)
            if folder1 == folder2 and folder2 == folder3:
                if log_writer is not None:
                    log_writer("Fast rmtreeing '{0}'".format(folder1))
                self.azure_storage.azure_shard_container.rmtree(folder1)
                return

        self.file_share._simple_rmtree(log_writer=log_writer)
        self.azure_storage._simple_rmtree(log_writer=log_writer)
        self.dib_directory._simple_rmtree(log_writer=log_writer)

    def _simple_remove(self, simple_file_name, log_writer=None):
        if self.file_share._simple_file_exists(simple_file_name):
            self.file_share._simple_remove(simple_file_name)
        self.azure_storage._simple_remove(simple_file_name,
                                          log_writer=log_writer)

    def _simple_walk(self):
        return self.azure_storage._simple_walk()

    def azure_storage_only(self, path=None, log_writer=None):
        '''
        Remove everything except the AzureStorage copy of the file system
        '''
        self.dib_directory.rmtree()  #Remove everything from the dib directory
        self.file_share._remove_local_if_any(path)
        self.file_share.directory.rmtree(path, log_writer=log_writer)

    def remove_from_azure_storage(self, path=None, log_writer=None):
        '''
        Remove everything from AzureStorage (local copies will remain, but will be ignored)
        '''
        if path is None:
            self.rmtree(
                log_writer=log_writer
            )  #This will remove azure_storage and directory (and the dibs, again)
        elif self.azure_storage.file_exists(path):
            self.azure_storage.remove(path)
        self.azure_storage_only(
            path, log_writer=log_writer
        )  #This will remove any local files (and the dibs, again)