def test_atomic_write(tmpdir):
    fname = tmpdir.join('ha')
    for i in range(2):
        with atomic_write(str(fname), overwrite=True) as f:
            f.write('hoho')

    with pytest.raises(OSError) as excinfo:
        with atomic_write(str(fname), overwrite=False) as f:
            f.write('haha')

    assert excinfo.value.errno == errno.EEXIST

    assert fname.read() == 'hoho'
    assert len(tmpdir.listdir()) == 1
def test_teardown(tmpdir):
    fname = tmpdir.join('ha')
    with pytest.raises(AssertionError):
        with atomic_write(str(fname), overwrite=True) as f:
            assert False

    assert not tmpdir.listdir()
    def export_feature(self, feature):

        abs_path = self.feature2abspath(feature)

        with atomicwrites.atomic_write(abs_path, overwrite=True) as fh:
            self.encoder.encode_feature(feature, fh)

        return abs_path
def test_replace_simultaneously_created_file(tmpdir):
    fname = tmpdir.join('ha')
    with atomic_write(str(fname), overwrite=True) as f:
        f.write('hoho')
        fname.write('harhar')
        assert fname.read() == 'harhar'
    assert fname.read() == 'hoho'
    assert len(tmpdir.listdir()) == 1
def test_atomic_write_in_pwd(tmpdir):
    orig_curdir = os.getcwd()
    try:
        os.chdir(str(tmpdir))
        fname = 'ha'
        for i in range(2):
            with atomic_write(str(fname), overwrite=True) as f:
                f.write('hoho')

        with pytest.raises(OSError) as excinfo:
            with atomic_write(str(fname), overwrite=False) as f:
                f.write('haha')

        assert excinfo.value.errno == errno.EEXIST

        assert open(fname).read() == 'hoho'
        assert len(tmpdir.listdir()) == 1
    finally:
        os.chdir(orig_curdir)
def test_open_reraise(tmpdir):
    fname = tmpdir.join('ha')
    with pytest.raises(AssertionError):
        with atomic_write(str(fname), overwrite=False) as f:
            # Mess with f, so rollback will trigger an OSError. We're testing
            # that the initial AssertionError triggered below is propagated up
            # the stack, not the second exception triggered during rollback.
            f.name = "asdf"
            # Now trigger our own exception.
            assert False, "Intentional failure for testing purposes"
def test_dont_remove_simultaneously_created_file(tmpdir):
    fname = tmpdir.join('ha')
    with pytest.raises(OSError) as excinfo:
        with atomic_write(str(fname), overwrite=False) as f:
            f.write('hoho')
            fname.write('harhar')
            assert fname.read() == 'harhar'

    assert excinfo.value.errno == errno.EEXIST
    assert fname.read() == 'harhar'
    assert len(tmpdir.listdir()) == 1
Пример #8
0
 def _upload_impl(self, item, href):
     fpath = self._get_filepath(href)
     try:
         with atomic_write(fpath, mode='wb', overwrite=False) as f:
             f.write(item.raw.encode(self.encoding))
             return fpath, get_etag_from_file(f)
     except OSError as e:
         if e.errno == errno.EEXIST:
             raise exceptions.AlreadyExistingError(existing_href=href)
         else:
             raise
Пример #9
0
def save_status(base_path, pair, collection=None, data_type=None, data=None):
    assert data_type is not None
    assert data is not None
    status_name = get_status_name(pair, collection)
    path = expand_path(os.path.join(base_path, status_name)) + '.' + data_type
    prepare_status_path(path)

    with atomic_write(path, mode='w', overwrite=True) as f:
        json.dump(data, f)

    os.chmod(path, STATUS_PERMISSIONS)
Пример #10
0
 def _write(self):
     if self._last_mtime is not None and self._last_mtime != os.path.getmtime(self.path):
         raise exceptions.PreconditionFailed(
             "Some other program modified the file {r!}. Re-run the "
             "synchronization and make sure absolutely no other program is "
             "writing into the same file.".format(self.path)
         )
     text = join_collection((item.raw for item, etag in itervalues(self._items)))
     try:
         with atomic_write(self.path, mode="wb", overwrite=True) as f:
             f.write(text.encode(self.encoding))
     finally:
         self._items = None
         self._last_mtime = None
Пример #11
0
def write(text, filename, encoding='utf-8', mode='wb'):
    """
    Write 'text' to file ('filename') assuming 'encoding' in an atomic way
    Return (eventually new) encoding
    """
    text, encoding = encode(text, encoding)
    if 'a' in mode:
        with open(filename, mode) as textfile:
            textfile.write(text)
    else:
        with atomic_write(filename,
                          overwrite=True,
                          mode=mode) as textfile:
            textfile.write(text)
    return encoding
Пример #12
0
Файл: vdir.py Проект: pdav/khal
    def update(self, href, item, etag):
        fpath = self._get_filepath(href)
        if not os.path.exists(fpath):
            raise NotFoundError(item.uid)
        actual_etag = get_etag_from_file(fpath)
        if etag != actual_etag:
            raise WrongEtagError(etag, actual_etag)

        if not isinstance(item.raw, str):
            raise TypeError('item.raw must be a unicode string.')

        with atomic_write(fpath, mode='wb', overwrite=True) as f:
            f.write(item.raw.encode(self.encoding))
            etag = get_etag_from_file(f)

        return etag
Пример #13
0
 def _write(self):
     if self._last_etag is not None and \
        self._last_etag != get_etag_from_file(self.path):
         raise exceptions.PreconditionFailed(
             'Some other program modified the file {r!}. Re-run the '
             'synchronization and make sure absolutely no other program is '
             'writing into the same file.'.format(self.path))
     text = join_collection(
         item.raw for item, etag in self._items.values()
     )
     try:
         with atomic_write(self.path, mode='wb', overwrite=True) as f:
             f.write(text.encode(self.encoding))
     finally:
         self._items = None
         self._last_etag = None
Пример #14
0
 def write(self, create=False):
     if self.filename is None:
         if not create:
             raise ValueError('Create arg must be true '
                              'if filename is None.')
         self.random_filename()
         if self.filepath is None:
             raise ValueError('basepath and calendar must be set.')
     calendar_path = os.path.join(self.basepath, self.calendar)
     if not os.path.exists(calendar_path):
         raise CliError('Calendars are not explicitly created. '
                        'Please create the directory {} yourself.'
                        .format(calendar_path))
     with atomic_write(self.filepath, mode='wb', overwrite=not create) as f:
         f.write(self.vcal.to_ical())
     while self._old_filepaths:
         os.remove(self._old_filepaths.pop())
Пример #15
0
def save_status(base_path, pair, collection=None, data_type=None, data=None):
    assert data_type is not None
    assert data is not None
    status_name = get_status_name(pair, collection)
    path = expand_path(os.path.join(base_path, status_name)) + '.' + data_type
    dirname = os.path.dirname(path)

    try:
        os.makedirs(dirname, STATUS_DIR_PERMISSIONS)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

    with atomic_write(path, mode='w', overwrite=True) as f:
        json.dump(data, f)

    os.chmod(path, STATUS_PERMISSIONS)
Пример #16
0
def _download_sqlite(db_path):
    logging.info('Grabbing file-copy lock')
    if FILE_COPY_LOCK.acquire():
        # Check it again, now that we've finally gotten the lock
        logging.info('Grabbed lock, checking if file exists: %s', os.path.exists(db_path))
        if not os.path.exists(db_path):
            # TODO: ensure we copy to a staging ground, and move it into place (so no one tries to open a malformed file)
            client = storage.Client('dancedeets-hrd')
            bucket = client.get_bucket('dancedeets-dependencies')
            logging.info('Downloading file')
            blob = bucket.get_blob(os.path.basename(db_path))
            # Luckily this only takes around 5-10 seconds (for street dance) when run on GCE instances
            contents = blob.download_as_string()
            with atomic_write(db_path, overwrite=True) as f:
                f.write(contents)
                # db_path doesn't exist yet.
            # Now it does.
        FILE_COPY_LOCK.release()
Пример #17
0
    def update(self, href, item, etag):
        fpath = self._get_filepath(href)
        if not os.path.exists(fpath):
            raise exceptions.NotFoundError(item.uid)
        actual_etag = get_etag_from_file(fpath)
        if etag != actual_etag:
            raise exceptions.WrongEtagError(etag, actual_etag)

        if not isinstance(item.raw, text_type):
            raise TypeError('item.raw must be a unicode string.')

        with atomic_write(fpath, mode='wb', overwrite=True) as f:
            f.write(item.raw.encode(self.encoding))
            etag = get_etag_from_fileobject(f)

        if self.post_hook:
            self._run_post_hook(fpath)
        return etag
Пример #18
0
def _write_pyc(state, co, source_stat, pyc):
    # Technically, we don't have to have the same pyc format as
    # (C)Python, since these "pycs" should never be seen by builtin
    # import. However, there's little reason deviate, and I hope
    # sometime to be able to use imp.load_compiled to load them. (See
    # the comment in load_module above.)
    try:
        with atomicwrites.atomic_write(pyc, mode="wb", overwrite=True) as fp:
            fp.write(imp.get_magic())
            mtime = int(source_stat.mtime)
            size = source_stat.size & 0xFFFFFFFF
            fp.write(struct.pack("<ll", mtime, size))
            fp.write(marshal.dumps(co))
    except EnvironmentError as e:
        state.trace("error writing pyc file at %s: errno=%s" % (pyc, e.errno))
        # we ignore any failure to write the cache file
        # there are many reasons, permission-denied, __pycache__ being a
        # file etc.
        return False
    return True
Пример #19
0
def save_status(base_path, pair, collection=None, data_type=None, data=None):
    assert data_type is not None
    assert data is not None
    status_name = get_status_name(pair, collection)
    path = expand_path(os.path.join(base_path, status_name)) + '.' + data_type
    base_path = os.path.dirname(path)

    if collection is not None and os.path.isfile(base_path):
        raise CliError('{0} is probably a legacy file and could be removed '
                       'automatically, but this choice is left to the '
                       'user. If you think this is an error, please file '
                       'a bug at {1}'.format(base_path, PROJECT_HOME))

    try:
        os.makedirs(base_path, 0o750)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

    with atomic_write(path, mode='w', overwrite=True) as f:
        json.dump(data, f)
Пример #20
0
    async def apply_config(
        self,
        interfaces: Dict[str, InterfaceModel],
        nameservers: List[str],
        routes: List[RouteModel],
    ) -> None:
        logger.debug("In Network.apply_config()")
        self._is_busy = True
        sysconfig = Path("/etc/sysconfig/network")

        bonding_slave_interfaces: List[str] = []
        for iface in interfaces:
            conf = sysconfig.joinpath(f"ifcfg-{iface}")
            new_config = interfaces[iface].config
            if new_config is None:
                # interface with no config, so get rid of it
                conf.unlink(missing_ok=True)
                continue
            # This is destructive of any existing config
            # (i.e. we're not mergeing here, we're clobbering)
            logger.info(f"Writing {conf}")
            with atomic_write(conf, overwrite=True) as f:
                f.write(f"STARTMODE='auto'\n"
                        f"BOOTPROTO='{new_config.bootproto}'\n"
                        f"IPADDR='{new_config.ipaddr}'\n")
                if new_config.bonding_slaves:
                    f.write("BONDING_MASTER='yes'\n")
                    for i, s in enumerate(new_config.bonding_slaves):
                        f.write(f"BONDING_SLAVE{i}='{s}'\n")
                        bonding_slave_interfaces.append(s)
                    f.write(
                        "BONDING_MODULE_OPTS='mode=active-backup miimon=100'\n"
                    )
        # special case for bonding slave interfaces, if any, which need to be
        # overwritten with STARTMODE=hotplug, BOOTPROTO=none
        for iface in bonding_slave_interfaces:
            conf = sysconfig.joinpath(f"ifcfg-{iface}")
            logger.info(f"Writing {conf}")
            with atomic_write(conf, overwrite=True) as f:
                f.write("STARTMODE='hotplug'\nBOOTPROTO='none'\n")
        self._refresh_interfaces()

        # Write nameserver config
        self._update_config(
            Path("/etc/sysconfig/network/config"),
            {"NETCONFIG_DNS_STATIC_SERVERS": " ".join(nameservers)},
        )
        self._refresh_nameservers()

        # Write route config
        for iface in self._interfaces:
            self._update_route_file(
                Path(f"/etc/sysconfig/network/ifroute-{iface}"),
                iface,
                [route for route in routes if route.interface == iface],
            )
        self._update_route_file(
            Path("/etc/sysconfig/network/routes"),
            "-",
            [route for route in routes if route.interface == "-"],
        )
        self._refresh_routes()

        logger.info("Restarting network services...")
        ret, _, err = await aqr_run_cmd(
            ["systemctl", "restart", "network.service"])
        # TODO: can this fail?
        logger.info("Restarted network services")
        self._is_busy = False
Пример #21
0
def update_metafile(source_meta, dest_meta, updated, **kwargs):

    features = {}

    for path in updated:

        path = os.path.abspath(path)

        # See what's going on here - it is something between not
        # awesome and wildly inefficient. It is possible and likely
        # and just generally more better that we can and will
        # simply parse the filename and extract the ID accordingly.
        # The thing is that we need (want) to pass a file to the
        # dump_row function in order to generate a file_hash which
        # a bunch of other services use for detecting changes.
        # So, in the meantime this is what we're doing...
        # (20151111/thisisaaronland)

        feature = mapzen.whosonfirst.utils.load_file(path)

        props = feature['properties']
        wofid = props['wof:id']

        features[wofid] = path

    source_fh = open(source_meta, 'r')
    reader = csv.DictReader(source_fh)

    writer = None

    with atomicwrites.atomic_write(dest_meta, mode='wb',
                                   overwrite=True) as dest_fh:

        for row in reader:

            id = row['id']
            id = int(id)

            if features.get(id, False):

                logging.debug("update row for %s in %s" % (id, dest_meta))

                path = features[id]
                row = mapzen.whosonfirst.meta.dump_file(path, **kwargs)

                del (features[id])

            if not writer:
                fn = fieldnames()
                writer = csv.DictWriter(dest_fh, fieldnames=fn)
                writer.writeheader()

            writer.writerow(row)

        for wofid, path in features.items():

            row = mapzen.whosonfirst.meta.dump_file(path, **kwargs)
            writer.writerow(row)

    # https://github.com/whosonfirst/py-mapzen-whosonfirst-meta/issues/2

    perms = kwargs.get('perms', 0o644)

    if perms != None:
        os.chmod(dest_meta, perms)
Пример #22
0
def save_settings(settings):
    with atomic_write(SETTINGS_FILE, overwrite=True) as f:
        f.write(json.dumps(settings, indent=4))
Пример #23
0
    def _download_file(self, url, filename, compress=False, timeout=30):
        """ Download a file to the resources folder.

        Download data from `url`, save as `filename`, and optionally compress with gzip.

        Parameters
        ----------
        url : str
            URL to download data from
        filename : str
            name of file to save; if compress, ensure '.gz' is appended
        compress : bool
            compress with gzip
        timeout : int
            seconds for timeout of download request

        Returns
        -------
        str
            path to downloaded file, empty str if error
        """
        if compress and filename[-3:] != ".gz":
            filename += ".gz"

        destination = os.path.join(self._resources_dir, filename)

        if not create_dir(os.path.dirname(destination)):
            return ""

        if not os.path.exists(destination):
            try:
                # get file if it hasn't already been downloaded
                # http://stackoverflow.com/a/7244263
                with urllib.request.urlopen(
                        url, timeout=timeout) as response, atomic_write(
                            destination, mode="wb", overwrite=True) as f:
                    self._print_download_msg(destination)
                    data = response.read()  # a `bytes` object

                    if compress:
                        self._write_data_to_gzip(f, data)
                    else:
                        f.write(data)
            except urllib.error.URLError as err:
                logger.warning(err)
                destination = ""
                # try HTTP if an FTP error occurred
                if "ftp://" in url:
                    destination = self._download_file(
                        url.replace("ftp://", "http://"),
                        filename,
                        compress=compress,
                        timeout=timeout,
                    )
            except socket.timeout:
                logger.warning(f"Timeout downloading {url}")
                destination = ""
            except FileExistsError:
                # if the file exists, another process has created it while it was
                # being downloaded
                # in such a case, the other copy is identical, so ignore this error
                pass

        return destination
Пример #24
0
    def cacheData(self,
                  keyname,
                  online_path_list,
                  username=None,
                  password=None,
                  authentication_url=None,
                  cookiejar=None,
                  use_requests=False,
                  use_progress_bar=True):
        '''
        Download and store specified data to local disk

        @param keyname: Name of dataset in configuration file
        @param online_path_list: List of urls to data
        @param username: Username for accessing online resources
        @param password: Password for accessing online resources
        @param authentication_url: The url used for authentication (unused when use_requests=True)
        @param cookiejar: The cookiejar that stores credentials (unused when use_requests=True)
        @param use_requests: Use the requests library instead of the standard library for accessing resources
        @param use_progress_bar: Use a progress bar to show number of items downloaded

        @return List of downloaded file locations
        '''
        def parseURL(data_location, in_path):
            '''
            This function takes the file path of saved data and determines
            what url created it.

            @param data_location: Absolute path to root directory whose path is not part of the url
            @param path: Path to object that will be used to generate a url

            @return ParseResult of url generated from in_path
            '''
            data_location_parts = len(pathlib.Path(data_location).parts[:])
            path = pathlib.Path(in_path)
            access_type = path.parts[data_location_parts]
            if access_type != 'file':
                access_type += '://'
            else:
                access_type += ':///'

            url_path = pathlib.Path(*path.parts[data_location_parts +
                                                1:]).as_posix()
            return parse.urlparse(access_type + url_path)

        def generatePath(data_location, parsed_url):
            '''
            This function takes a parsed url (ParseResult) and
            generates the filepath to where the data should be stored
            stored

            @param data_location: Location where data is stored
            @param parsed_url: ParseResult generated from url

            @return Local path to file
            '''

            if parsed_url.query == '':
                return os.path.join(data_location, parsed_url.scheme,
                                    parsed_url.netloc, parsed_url.path[1:])

            else:
                return os.path.join(
                    data_location, parsed_url.scheme, parsed_url.netloc,
                    parsed_url.path[1:] + '?' + parsed_url.query)

        # Get absolute path to data directory
        data_location = DataFetcherCache.getDataLocation(keyname)

        # If it doesn't exist, create a new one
        if data_location == None:
            data_location = os.path.join(os.path.expanduser('~'), '.skdaccess',
                                         keyname)
            os.makedirs(data_location, exist_ok=True)
            DataFetcherCache.setDataLocation(keyname, data_location)

        # Get currently downloaded files
        downloaded_full_file_paths = [
            filename for filename in glob(os.path.join(data_location, '**'),
                                          recursive=True)
            if os.path.isfile(filename)
        ]
        # Remove files empty files
        downloaded_full_file_paths = [
            filename for filename in downloaded_full_file_paths
            if self.checkIfDataExists(filename)
        ]
        # Convert filenames to urls
        downloaded_parsed_urls = set(
            parseURL(data_location, file_path)
            for file_path in downloaded_full_file_paths)

        # Determine which files are missing
        parsed_http_paths = [
            parse.urlparse(online_path) for online_path in online_path_list
        ]
        missing_files = list(
            set(parsed_http_paths).difference(downloaded_parsed_urls))

        missing_files.sort()

        # Download missing files
        if len(missing_files) > 0:
            # Sanity check on input options
            if use_requests == True and authentication_url != None:
                raise ValueError(
                    'Cannot use an authentication url with requests')

            # Setup connection (non requests)
            if not use_requests:
                # Deal with password protected urls
                # This method comes from
                # https://wiki.earthdata.nasa.gov/display/EL/How+To+Access+Data+With+Python
                if username != None or password != None:
                    password_manager = HTTPPasswordMgrWithDefaultRealm()
                    if authentication_url == None:
                        authentication_url = [
                            parsed_url.geturl() for parsed_url in missing_files
                        ]
                    password_manager.add_password(None, authentication_url,
                                                  username, password)
                    handler = HTTPBasicAuthHandler(password_manager)

                    # If no cookiejar was given, create a new one
                    if cookiejar == None:
                        cookiejar = CookieJar()

                    cookie_processor = HTTPCookieProcessor(cookiejar)

                    install_opener(build_opener(cookie_processor, handler))

                # Use a cookie with no username or password
                elif cookiejar != None:
                    cookie_processor = HTTPCookieProcessor(cookiejar)
                    install_opener(build_opener(cookie_processor))

            if use_progress_bar:
                missing_files_loop = tqdm(missing_files)
            else:
                missing_files_loop = missing_files

            for parsed_url in missing_files_loop:
                out_filename = generatePath(data_location, parsed_url)
                os.makedirs(os.path.split(out_filename)[0], exist_ok=True)

                with open(out_filename, 'a+b') as lockfile:

                    fcntl.lockf(lockfile, fcntl.LOCK_EX)
                    lockfile.seek(0)
                    if len(lockfile.read(1)) == 0:

                        with atomic_write(out_filename,
                                          mode='wb',
                                          overwrite=True) as data_file:
                            if not use_requests:
                                shutil.copyfileobj(
                                    urlopen(parsed_url.geturl()), data_file)
                            else:
                                if username != None or password != None:
                                    # This method to download password protected data comes from
                                    # https://wiki.earthdata.nasa.gov/display/EL/How+To+Access+Data+With+Python
                                    with requests.Session() as session:
                                        initial_request = session.request(
                                            'get', parsed_url.geturl())
                                        r = session.get(initial_request.url,
                                                        auth=(username,
                                                              password),
                                                        stream=True)

                                        if r.status_code == 401:
                                            raise RuntimeError(
                                                "Authorization Denied")

                                        shutil.copyfileobj(
                                            r.raw, data_file, 1024 * 1024 * 10)
                                else:
                                    with requests.Session() as session:
                                        r = session.get(parsed_url.geturl(),
                                                        stream=True)
                                        shutil.copyfileobj(
                                            r.raw, data_file, 1024 * 1024 * 10)

        # Return a list of file locations for parsing
        return [
            generatePath(data_location, parsed_url)
            for parsed_url in parsed_http_paths
        ]
Пример #25
0
def save_config(config):
    ensure_parent_dir_exists(CONFIG_FILE)
    with atomic_write(CONFIG_FILE, mode='wb', overwrite=True) as f:
        f.write(toml.dumps(config).encode('utf-8'))
Пример #26
0
 def write_empty(self, passphrase):
     with atomic_write(self.keys_path, mode='w', overwrite=False) as f:
         f.truncate()
     with atomic_write(self.store, mode='wb', overwrite=False) as f:
         f.write(_encrypt({}, passphrase))
def update_metafile(source_meta, dest_meta, updated, **kwargs):

    features = {}

    for path in updated:

        path = os.path.abspath(path)

        # See what's going on here - it is something between not
        # awesome and wildly inefficient. It is possible and likely
        # and just generally more better that we can and will
        # simply parse the filename and extract the ID accordingly.
        # The thing is that we need (want) to pass a file to the
        # dump_row function in order to generate a file_hash which 
        # a bunch of other services use for detecting changes.
        # So, in the meantime this is what we're doing...
        # (20151111/thisisaaronland)

        feature = mapzen.whosonfirst.utils.load_file(path)

        props = feature['properties']
        wofid = props['wof:id']
        
        features[wofid] = path
        
    source_fh = open(source_meta, 'r')
    reader = csv.DictReader(source_fh)

    writer = None

    with atomicwrites.atomic_write(dest_meta, mode='wb', overwrite=True) as dest_fh:

        for row in reader:

            id = row['id']
            id = int(id)

            if features.get(id, False):

                logging.debug("update row for %s in %s" % (id, dest_meta))
                
                path = features[id]
                row = mapzen.whosonfirst.meta.dump_file(path, **kwargs)

                del(features[id])

            if not writer:
                fn = fieldnames()
                writer = csv.DictWriter(dest_fh, fieldnames=fn)
                writer.writeheader()

            writer.writerow(row)

        for wofid, path in features.items():

            row = mapzen.whosonfirst.meta.dump_file(path, **kwargs)
            writer.writerow(row)

    # https://github.com/whosonfirst/py-mapzen-whosonfirst-meta/issues/2

    perms = kwargs.get('perms', 0644)

    if perms != None:
        os.chmod(dest_meta, perms)
Пример #28
0
 def _save_token(token):
     checkdir(expand_path(os.path.dirname(token_file)), create=True)
     with atomic_write(token_file, mode='w', overwrite=True) as f:
         json.dump(token, f)
Пример #29
0
def main():
    logger.info("start")

    # get filenames from openSNP data dump
    filenames = r.get_opensnp_datadump_filenames()

    filenames = [
        filename
        for filename in filenames
        if "readme" not in filename and "phenotype" not in filename
    ]

    # draw a sample from the observations
    random.seed(1)
    SAMPLE_SIZE = len(filenames)
    # SAMPLE_SIZE = 10
    samples = random.sample(range(len(filenames)), SAMPLE_SIZE)

    # setup tasks for parallelizing / execution on multiple cores
    p = Parallelizer(parallelize=True)
    tasks = [{"file": filenames[i]} for i in samples]

    # run tasks; results is a list of dicts
    results = p(load_file, tasks)

    # get results from `load_file` where `count` was non-zero
    rows = [item for item in results if "msg" not in item]

    df = pd.DataFrame(
        rows,
        columns=["file", "source", "build", "build_detected", "chromosomes", "count"],
    )

    save_df_as_csv(df, OUTPUT_DIR, "parse-opensnp-files.csv")

    # log parsing statistics
    file_count = len(filenames)
    logger.info(f"{file_count} files in the openSNP datadump")
    logger.info(f"{(len(df) / file_count):.2%} of openSNP datadump files parsed")
    logger.info(
        f"build detected in {len(df.loc[df.build_detected]) / len(df):.2%} of files parsed"
    )

    # extract files from the datadump where `load_file` returned a message
    if EXTRACT_FILES:
        # group files with same message (e.g., {"some message": ["file1", "file2"], ...})
        d = {}
        for result in results:
            if "msg" in result:
                if result["msg"] in d:
                    d[result["msg"]].append(result["file"])
                else:
                    d[result["msg"]] = [result["file"]]

        # add messages / file filters as necessary...
        d["build not detected"] = list(df.loc[~df.build_detected].file.values)

        # extract files that have messages for debugging
        for msg, files in d.items():
            if len(files) == 0:
                continue

            # create a directory for each message (prefix indicates number of files)
            path = os.path.join(OUTPUT_DIR, f"{len(files):04}_{clean_str(msg)}")
            create_dir(path)
            # save each file with message into created directory
            for filename in files:
                with atomic_write(os.path.join(path, filename), mode="wb") as f:
                    f.write(r.load_opensnp_datadump_file(filename))

    logger.info("stop")
Пример #30
0
 def set_state(self, state: str):
     os.makedirs(os.path.dirname(self.filename), exist_ok=True)
     with atomic_write(self.filename, overwrite=True, encoding='utf-8') as f:
         f.write(state)
Пример #31
0
def generate_cov(bam_file,
                 bam_index,
                 out,
                 threshold,
                 is_combined,
                 contig_threshold,
                 logger,
                 sep=None):
    """
    Call bedtools and generate coverage file

    bam_file: bam files used
    out: output
    threshold: threshold of contigs that will be binned
    is_combined: if using abundance feature in deep learning. True: use
    contig_threshold: threshold of contigs for must-link constraints
    sep: separator for multi-sample binning
    """
    import numpy as np
    logger.info('Processing `{}`'.format(bam_file))
    bam_name = os.path.split(bam_file)[-1] + '_{}'.format(bam_index)
    bam_depth = os.path.join(out, '{}_depth.txt'.format(bam_name))

    bed_p = subprocess.Popen(
        ['bedtools', 'genomecov', '-bga', '-ibam', bam_file],
        universal_newlines=True,
        stdout=subprocess.PIPE)

    if is_combined:
        contig_cov, must_link_contig_cov = calculate_coverage(
            bed_p.stdout,
            bam_file,
            threshold,
            is_combined=is_combined,
            sep=sep,
            contig_threshold=contig_threshold if sep is None else 1000,
            contig_threshold_dict=contig_threshold
            if sep is not None else None)
        if bed_p.wait() != 0:
            raise OSError("Failure in running bedtools")

        contig_cov = contig_cov.apply(lambda x: x + 1e-5)
        must_link_contig_cov = must_link_contig_cov.apply(lambda x: x + 1e-5)
        if sep is None:
            abun_scale = (contig_cov.mean() / 100).apply(np.ceil) * 100
            abun_split_scale = (must_link_contig_cov.mean() / 100).apply(
                np.ceil) * 100
            contig_cov = contig_cov.div(abun_scale)
            must_link_contig_cov = must_link_contig_cov.div(abun_split_scale)

        with atomic_write(os.path.join(out,
                                       '{}_data_cov.csv'.format(bam_name)),
                          overwrite=True) as ofile:
            contig_cov.to_csv(ofile)

        with atomic_write(os.path.join(
                out, '{}_data_split_cov.csv'.format(bam_name)),
                          overwrite=True) as ofile:
            must_link_contig_cov.to_csv(ofile)
    else:
        contig_cov = calculate_coverage(
            bed_p.stdout,
            bam_file,
            threshold,
            is_combined=is_combined,
            sep=sep,
            contig_threshold=contig_threshold if sep is None else 1000,
            contig_threshold_dict=contig_threshold
            if sep is not None else None)
        if bed_p.wait() != 0:
            raise OSError("Failure in running bedtools")

        contig_cov = contig_cov.apply(lambda x: x + 1e-5)

        with atomic_write(os.path.join(out,
                                       '{}_data_cov.csv'.format(bam_name)),
                          overwrite=True) as ofile:
            contig_cov.to_csv(ofile)

    return (bam_file, logger)
Пример #32
0
 def _save_token(token):
     with atomic_write(token_file, mode='w', overwrite=True) as f:
         json.dump(token, f)
Пример #33
0
 def set_meta(self, key, value):
     value = value or ''
     assert isinstance(value, str)
     fpath = os.path.join(self.path, key)
     with atomic_write(fpath, mode='wb', overwrite=True) as f:
         f.write(value.encode(self.encoding))
def __update_concordances(source, dest, to_process, **kwargs):
    
    to_update = {}

    source_fh = open(source, 'r')
    reader = csv.reader(source_fh)

    # First figure out the columns we've got

    cols = reader.next()

    # Next check to see if there are any new ones

    # Note that this does NOT attempt to check and see whether the files
    # in to_process actually have modified concordances. That is still not
    # a solved problem but beyond that it is a problem to solve elsewhere
    # in the stack. This assumes that by the time you pass a list of files
    # you've satisfied yourself that it's worth the processing time (not to
    # mention memory) to fill up `to_update` for all the files listed in
    # to_update. (20160107/thisisaaronland)

    for path in to_process:

        path = os.path.abspath(path)
        feature = mapzen.whosonfirst.utils.load_file(path)

        props = feature['properties']
        wofid = props['wof:id']

        concordances = props['wof:concordances']
        to_update[ wofid ] = concordances

        for src in concordances.keys():

            if not src in cols:
                cols.append(src)

    cols.sort()

    # Rewind the source concordances file so that we can create a dict reader

    source_fh.seek(0)
    reader = csv.DictReader(source_fh)

    writer = None

    with atomicwrites.atomic_write(dest, mode='wb', overwrite=True) as dest_fh:

        for row in reader:

            if not writer:
                writer = csv.DictWriter(dest_fh, fieldnames=cols)
                writer.writeheader()

            # See what we're doing here? If this is a record that's been
            # updated (see above for the nuts and bolts about how/where
            # we determine this) then we reassign it to `row`.

            wofid = row.get('wof:id')
            wofid = int(wofid)

            if to_update.get(wofid, False):
                row = to_update[wofid]
                row['wof:id'] = wofid

            out = {}

            # Esnure we have a value or "" for every src in cols

            for src in cols:
                out[ src ] = row.get(src, "")

            writer.writerow(out)
Пример #35
0
    def set_meta(self, key, value):
        value = normalize_meta_value(value)

        fpath = os.path.join(self.path, key)
        with atomic_write(fpath, mode='wb', overwrite=True) as f:
            f.write(value.encode(self.encoding))
Пример #36
0
            "build_detected",
            "x_snps",
            "heterozygous_x_snps",
            "y_snps",
            "y_snps_not_null",
            "count",
        ],
    )

    # derive the columns we want to analyze
    df["heterozygous_x_snps_ratio"] = df.heterozygous_x_snps / df.x_snps
    df["y_snps_not_null_ratio"] = df.y_snps_not_null / df.y_snps

    df.drop(df.loc[df["heterozygous_x_snps_ratio"].isna()].index, inplace=True)
    df.drop(df.loc[df["y_snps_not_null_ratio"].isna()].index, inplace=True)

    plt = create_analysis_plot(
        df[["heterozygous_x_snps_ratio", "y_snps_not_null_ratio"]])

    # save output
    with atomic_write(
            f"{os.path.join(OUTPUT_DIR, 'xy-chrom-snp-ratios.png')}",
            mode="wb",
            overwrite=True,
    ) as f:
        plt.savefig(f)

    save_df_as_csv(df, OUTPUT_DIR, "xy-chrom-snp-ratios.csv")

    logger.info("stop")
Пример #37
0
 def _save_token(token):
     checkdir(expand_path(os.path.dirname(token_file)), create=True)
     with atomic_write(token_file, mode='w', overwrite=True) as f:
         json.dump(token, f)
Пример #38
0
 def __call__(self, path, mode):
     return atomic_write(path, mode=mode, overwrite=self._overwrite)
Пример #39
0
def on_phrase(j):
    phrase = parse_phrase(j.get("phrase", []))
    cmd = j["cmd"]
    if cmd == "p.end" and phrase:
        with atomic_write(path, overwrite=True) as f:
            f.write(phrase)
Пример #40
0
 def _set_key(self, content):
     checkdir(os.path.dirname(self._key_path), create=True)
     with atomicwrites.atomic_write(self._key_path, mode='wb') as f:
         f.write(content)
     assert_permissions(self._key_path, 0o600)
Пример #41
0
def ensure_image_loaded(image_name, image_url, cache_dir):
    """Pull docker image into local repo."""

    # check if image is in local docker repo
    try:
        registry = app.conf.get("CONTAINER_REGISTRY", None)
        # Custom edit to load image from registry
        try:
            if registry is not None:
                logger.info(
                    "Trying to load docker image {} from registry '{}'".format(
                        image_name, registry))
                registry_url = os.path.join(registry, image_name)
                logger.info("docker pull {}".format(registry_url))
                check_output(['docker', 'pull', registry_url])
                logger.info("docker tag {} {}".format(registry_url, image_name))
                check_output(['docker', 'tag', registry_url, image_name])
        except Exception as e:
            logger.warn(
                "Unable to load docker image from registry '{}': {}".format(
                    registry, e))

        image_info = check_output(['docker', 'inspect', image_name])
        logger.info("Docker image %s cached in repo" % image_name)
    except:
        logger.info("Failed to inspect docker image %s" % image_name)

        # pull image from url
        if image_url is not None:
            image_file = os.path.join(cache_dir, os.path.basename(image_url))
            if not os.path.exists(image_file):
                logger.info("Downloading image %s (%s) from %s" %
                            (image_file, image_name, image_url))
                try:
                    osaka.main.get(image_url, image_file)
                except Exception as e:
                    raise RuntimeError("Failed to download image {}:\n{}".format(image_url, str(e)))
                logger.info("Downloaded image %s (%s) from %s" %
                            (image_file, image_name, image_url))
            load_lock = "{}.load.lock".format(image_file)
            try:
                with atomic_write(load_lock) as f:
                    f.write("%sZ\n" % datetime.utcnow().isoformat())
                logger.info("Loading image %s (%s)" % (image_file, image_name))
                p = Popen(['docker', 'load', '-i', image_file],
                          stderr=PIPE, stdout=PIPE)
                stdout, stderr = p.communicate()
                if p.returncode != 0:
                    raise RuntimeError("Failed to load image {} ({}): {}".format(image_file, image_name, stderr.decode()))
                logger.info("Loaded image %s (%s)" % (image_file, image_name))
                try:
                    os.unlink(image_file)
                except:
                    pass
                try:
                    os.unlink(load_lock)
                except:
                    pass
            except OSError as e:
                if e.errno == 17:
                    logger.info("Waiting for image %s (%s) to load" %
                                (image_file, image_name))
                    inspect_image(image_name)
                else:
                    raise
        else:
            # pull image from docker hub
            logger.info("Pulling image %s from docker hub" % image_name)
            check_output(['docker', 'pull', image_name])
            logger.info("Pulled image %s from docker hub" % image_name)
        image_info = check_output(['docker', 'inspect', image_name])
    logger.info("image info for %s: %s" % (image_name, image_info.decode()))
    return json.loads(image_info)[0]
def _dump(obj, abspath, serializer_type,
          dumper_func=None,
          compress=True,
          overwrite=False,
          verbose=False,
          **kwargs):
    """Dump object to file.

    :param abspath: The file path you want dump to.
    :type abspath: str

    :param serializer_type: 'binary' or 'str'.
    :type serializer_type: str

    :param dumper_func: A dumper function that takes an object as input, return
        binary or string.
    :type dumper_func: callable function

    :param compress: default ``False``. If True, then compress binary.
    :type compress: bool

    :param overwrite: default ``False``, If ``True``, when you dump to
      existing file, it silently overwrite it. If ``False``, an alert
      message is shown. Default setting ``False`` is to prevent overwrite
      file by mistake.
    :type overwrite: boolean

    :param verbose: default True, help-message-display trigger.
    :type verbose: boolean
    """
    _check_serializer_type(serializer_type)

    if not inspect.isfunction(dumper_func):
        raise TypeError("dumper_func has to be a function take object as input "
                        "and return binary!")

    prt_console("\nDump to '%s' ..." % abspath, verbose)
    if os.path.exists(abspath):
        if not overwrite:
            prt_console(
                "    Stop! File exists and overwrite is not allowed",
                verbose,
            )
            return

    st = time.clock()

    b_or_str = dumper_func(obj, **kwargs)
    if serializer_type is "str":
        b = b_or_str.encode("utf-8")
    else:
        b = b_or_str

    if compress:
        b = zlib.compress(b)

    with atomic_write(abspath, overwrite=overwrite, mode="wb") as f:
        f.write(b)

    elapsed = time.clock() - st
    prt_console("    Complete! Elapse %.6f sec." % elapsed, verbose)

    if serializer_type is "str":
        return b_or_str
    else:
        return b
Пример #43
0
 def _save(self):
     with atomic_write(self._filename, overwrite=True) as f:
         for entry in self._entries:
             line = entry.to_line()
             f.write(line)
Пример #44
0
 def _set_auth_token(self, token):
     checkdir(os.path.dirname(self._auth_token_path), create=True)
     with atomicwrites.atomic_write(self._auth_token_path) as f:
         f.write(token)
     assert_permissions(self._auth_token_path, 0o600)
Пример #45
0
 def write(self, where, content):
     with atomic_write(path=where, overwrite=True, mode='wb') as f:
         f.write(pickle.dumps(content))
Пример #46
0
 def _set_key(self, content):
     checkdir(os.path.dirname(self._key_path), create=True)
     with atomicwrites.atomic_write(self._key_path, mode='wb') as f:
         f.write(content)
     assert_permissions(self._key_path, 0o600)
Пример #47
0
    def write(self, write_cb):
        assert self._lock.is_locked

        with atomic_write(self._path, overwrite=True) as f:
            write_cb(f)
Пример #48
0
 def _set_auth_token(self, token):
     checkdir(os.path.dirname(self._auth_token_path), create=True)
     with atomicwrites.atomic_write(self._auth_token_path) as f:
         f.write(token)
     assert_permissions(self._auth_token_path, 0o600)
Пример #49
0
def main(args=None):
    import shlex

    start = datetime.datetime.now()

    if args is None:
        args = sys.argv

    # Save it for later
    command_line = ' '.join([shlex.quote(a) for a in args])

    args = parse_args(args)
    validate_args(args)

    out = args.output
    if not os.path.exists(out):
        os.makedirs(out)

    with tempfile.TemporaryDirectory() as tmpdirname:
        try:
            if args.genome_fasta is not None:
                gene_prediction(args.genome_fasta, out, tmpdirname)
                args.nt_input = out + '/prodigal_out.fna'
                args.aa_input = out + '/prodigal_out.faa'

            if args.nt_input is not None:
                n_nt = number_seqs_fafile(args.nt_input)
                n_aa = number_seqs_fafile(args.aa_input)
                if n_nt != n_aa:
                    sys.stderr.write(
                        f"The input DNA and amino acid files must have the same number of sequences!\n"
                    )
                    sys.stderr.write(
                        f"DNA file has {n_nt} while amino acid file has {n_aa}!"
                    )
                    sys.exit(1)
                split_file(args.nt_input,
                           output_dir=tmpdirname + '/split_file',
                           is_dna=True)

            num_split = split_file(args.aa_input,
                                   output_dir=tmpdirname + '/split_file',
                                   is_dna=False)
            hit_table = []
            gene_table = []
            print(
                'Starting GMGC queries (total: {} batches to process)'.format(
                    num_split))
            for index in tqdm(range(num_split)):
                besthit = query_gmgc(tmpdirname +
                                     '/split_file/split_{}.faa'.format(index +
                                                                       1))
                if besthit is not None:
                    besthit = json.loads(bytes.decode(
                        besthit.content))['results']
                    nt_split = None
                    if args.nt_input is not None:
                        nt_split = tmpdirname + '/split_file/split_{}.fna'.format(
                            index + 1)
                    aa_split = tmpdirname + '/split_file/split_{}.faa'.format(
                        index + 1),
                    hit_table_index, gene_inf = realignment(
                        nt_split, aa_split, besthit)
                    hit_table.extend(hit_table_index)
                    gene_table.extend(gene_inf)
            hit_table = pd.DataFrame(hit_table)
            hit_table.columns = [
                'query_name', 'unigene_id', 'align_category', 'gene_dna',
                'gene_protein'
            ]

            gene_table = pd.DataFrame(gene_table)
            gene_table.columns = [
                'unigene_id', 'sample', 'longitude', 'latitude', 'habitat'
            ]
            num_gene = hit_table.shape[0]

            summary = []
            summary.append('*' * 30 + 'GMGC-mapper results summary table' +
                           '*' * 30)
            summary.append('- Processed {} genes'.format(num_gene))
            match_result = hit_table['align_category'].value_counts().to_dict()
            if 'EXACT' in match_result:
                summary.append(
                    ' -{0} ({1:.1%}) were found in the GMGC at above 95% nucleotide identity with at least 95% coverage'
                    .format(match_result['EXACT'],
                            match_result['EXACT'] / num_gene))
            else:
                summary.append(
                    ' -No genes were found in the GMGC at above 95% nucleotide identity with at least 95% coverage'
                )

            if 'SIMILAR' in match_result:
                summary.append(
                    ' -{0} ({1:.1%}) were found in the GMGC at above 80% nucleotide identity with at least 80% coverage'
                    .format(match_result['SIMILAR'],
                            match_result['SIMILAR'] / num_gene))
            else:
                summary.append(
                    ' -No genes were found in the GMGC at above 80% nucleotide identity with at least 80% coverage'
                )

            if 'MATCH' in match_result:
                summary.append(
                    ' -{0} ({1:.1%}) were found in the GMGC at above 50% nucleotide identity with at least 50% coverage'
                    .format(match_result['MATCH'],
                            match_result['MATCH'] / num_gene))
            else:
                summary.append(
                    ' -No genes were found in the GMGC at above 50% nucleotide identity with at least 50% coverage'
                )

            no_match = match_result.get('NO MATCH', 0.0) + match_result.get(
                'NO HIT', 0.0)
            if no_match:
                summary.append(
                    ' -{0} ({1:.1%}) had no match in the GMGC'.format(
                        no_match, no_match / num_gene))

            genome_bin = query_genome_bin(hit_table)
            genome_bin = genome_bin.sort_values('nr_hits', ascending=False)
            summary.append('\n\n' + '*' * 30 +
                           'GMGC-mapper results genome_bin summary' +
                           '*' * 30 + '\n')

            num_hitting = genome_bin['nr_hits'].values
            summary.append('{} bins were reported for >50% of genes'.format(
                np.sum(num_hitting > num_gene * 0.5)))
            summary.append('{} bins were reported for >25% of genes'.format(
                np.sum(num_hitting > num_gene * 0.25)))
            summary.append('{} bins were reported for >10% of genes'.format(
                np.sum(num_hitting > num_gene * 0.1)))

            with atomic_write(out + '/genome_bin.tsv',
                              overwrite=True) as ofile:
                ofile.write(
                    '# Genome_bin from GMGC-mapper v{}\n'.format(__version__))
                genome_bin.to_csv(ofile, sep='\t', index=False)

            with atomic_write(out + '/hit_table.tsv', overwrite=True) as ofile:
                ofile.write(
                    '# Results from GMGC-mapper v{}\n'.format(__version__))
                hit_table.to_csv(ofile, sep='\t', index=False)

            with atomic_write(out + '/gene_table.tsv',
                              overwrite=True) as ofile:
                ofile.write('# Gene information from GMGC-mapper v{}\n'.format(
                    __version__))
                gene_table.to_csv(ofile, sep='\t', index=False)

            with atomic_write(out + '/summary.txt', overwrite=True) as ofile:
                for s in summary:
                    print(s)
                    ofile.write(s + '\n')

            output_content = resource_string(__name__, 'output.md')
            with atomic_write(out + '/README.md', overwrite=True) as ofile:
                ofile.write(bytes.decode(output_content))

            end = datetime.datetime.now()

            run_metadata = {
                'Command_line': command_line,
                'GMGC-mapper': __version__,
                'Working directory': os.getcwd(),
                'Start time': str(start),
                'End time': str(end),
                'Run time': (end - start).seconds,
                'Inputs': [],
            }

            if args.genome_fasta is not None:
                run_metadata['Inputs'].append(
                    {'genome_input': input_metadata(args.genome_fasta)})
            if args.nt_input is not None:
                run_metadata['Inputs'].append(
                    {'nt_input': input_metadata(args.nt_input)})
            if args.aa_input is not None:
                run_metadata['Inputs'].append(
                    {'aa_input': input_metadata(args.aa_input)})

            with atomic_write(out + '/runlog.yaml', overwrite=True) as ofile:
                yaml.dump(run_metadata, ofile, default_flow_style=False)
        except Exception as e:
            sys.stderr.write('GMGC-mapper Error: ')
            sys.stderr.write(str(e))
            sys.stderr.write('\n')
            sys.exit(1)
Пример #50
0
Файл: vdir.py Проект: pdav/khal
 def set_meta(self, key, value):
     value = value or ''
     assert isinstance(value, str)
     fpath = os.path.join(self.path, key)
     with atomic_write(fpath, mode='wb', overwrite=True) as f:
         f.write(value.encode(self.encoding))
Пример #51
0
    def download_example_datasets(self):
        """ Download example datasets from `openSNP <https://opensnp.org>`_.

        Per openSNP, "the data is donated into the public domain using `CC0 1.0
        <http://creativecommons.org/publicdomain/zero/1.0/>`_."

        Returns
        -------
        paths : list of str or empty str
            paths to example datasets

        References
        ----------
        1. Greshake B, Bayer PE, Rausch H, Reda J (2014), "openSNP-A Crowdsourced Web Resource
           for Personal Genomics," PLOS ONE, 9(3): e89204,
           https://doi.org/10.1371/journal.pone.0089204
        """
        paths = []
        paths.append(
            self._download_file(
                "https://opensnp.org/data/662.23andme.304",
                "662.23andme.304.txt.gz",
                compress=True,
            )
        )
        paths.append(
            self._download_file(
                "https://opensnp.org/data/662.23andme.340",
                "662.23andme.340.txt.gz",
                compress=True,
            )
        )
        paths.append(
            self._download_file(
                "https://opensnp.org/data/662.ftdna-illumina.341",
                "662.ftdna-illumina.341.csv.gz",
                compress=True,
            )
        )
        paths.append(
            self._download_file(
                "https://opensnp.org/data/663.23andme.305",
                "663.23andme.305.txt.gz",
                compress=True,
            )
        )

        # these two files consist of concatenated gzip files and therefore need special handling
        paths.append(
            self._download_file(
                "https://opensnp.org/data/4583.ftdna-illumina.3482",
                "4583.ftdna-illumina.3482.csv.gz",
            )
        )
        paths.append(
            self._download_file(
                "https://opensnp.org/data/4584.ftdna-illumina.3483",
                "4584.ftdna-illumina.3483.csv.gz",
            )
        )

        for gzip_path in paths[-2:]:
            # https://stackoverflow.com/q/4928560
            # https://stackoverflow.com/a/37042747
            with open(gzip_path, "rb") as f:
                decompressor = zlib.decompressobj(31)

                # decompress data from first concatenated gzip file
                data = decompressor.decompress(f.read())

                if len(decompressor.unused_data) > 0:
                    # decompress data from second concatenated gzip file, if any
                    additional_data = zlib.decompress(decompressor.unused_data, 31)
                    data += additional_data[33:]  # skip over second header

            # recompress data
            with atomic_write(gzip_path, mode="wb", overwrite=True) as f:
                self._write_data_to_gzip(f, data)

        return paths
Пример #52
0
def dump_debug_info(praw_object,
                    exc=None,
                    paste_key=None,
                    xml=None,
                    extra_data={},
                    dir="error",
                    build=None):
    '''
	if not isinstance(praw_object, praw_object_wrapper_t):
		raise ValueError("dump_debug_info was passed an invalid praw_object: {}".format(type(praw_object)))
	'''

    if not (paste_key is None or isinstance(paste_key, str)):
        raise ValueError(
            "dump_debug_info was passed an invalid paste_key: {}".format(
                type(paste_key)))

    id = praw_object.id

    if not os.path.exists(dir):
        os.makedirs(dir)

    if not os.path.exists("{}/{}".format(dir, id)):
        os.makedirs("{}/{}".format(dir, id))
    '''
	if xml is None and isinstance(paste_key, str):
		try:
			c = get_url_data("http://pastebin.com/raw/" + paste_key)
			c = c.replace("-", "+").replace("_", "/")
			xml = pastebin.decode_base64_and_inflate(c)
		except urllib2.URLError as e:
			logging.error("An exception occurred when attempting to fetch xml for debug dump.")
	'''

    if xml is not None:
        if isinstance(xml, ET.ElementTree):
            xml = xml.getroot()

        xml_str = ET.tostring(xml).decode()

        if not isinstance(xml_str, str):
            raise ValueError(
                "dump_debug_info was passed invalid xml: is not string or coercable to string"
            )

        with open("{}/{}/pastebin.xml".format(dir, id), "w") as f:
            f.write(xml_str)

    data = {}

    if exc is not None:
        data['error_text'] = repr(exc)

    if paste_key is not None:
        data['pastebin_url'] = "http://pastebin.com/raw/{}".format(paste_key)

    if praw_object is not None:
        if isinstance(praw_object, praw.models.Comment):
            data['type'] = "comment"
        else:
            data['type'] = "submission"

        data['url'] = praw_object.permalink

    if build is not None:
        if hasattr(build, 'passives_by_name'):
            data['passives'] = build.passives_by_name

            for key, val in list(data['passives'].items()):
                if not isinstance(key, str):
                    logging.warning(
                        "WARNING: {} passive key overriden to 'NONE'.".format(
                            key))
                    data['passives']['NONE'] = val
                    del data['passives'][key]

    data.update(extra_data)

    with atomic_write("{}/{}/info.json".format(dir, id), overwrite=True) as f:
        json.dump(data, f, sort_keys=True, indent=4)

    if exc is not None:
        with open("{}/{}/traceback.txt".format(dir, id), "w") as f:
            traceback.print_exc(file=f)

    logging.info("Dumped info to {}/{}/".format(dir, id))
Пример #53
0
async def dir_task(limiter, session, path, consul_path):
    path.mkdir(exist_ok=True)

    prev_items = {str(i.relative_to(path)) for i in path.rglob("*")}

    index = 0

    while True:
        if index:
            index_q = f"&index={index}"
        else:
            index_q = ""

        logger.debug("dir_task: get: path=%s index=%s ...", consul_path, index)

        try:
            await limiter.acquire()
            async with session.get(
                    f"{CONSUL_ADDR}/v1/kv/{consul_path}?recurse=true{index_q}",
            ) as r:
                logger.debug("dir_task: ... get: path=%s status=%s",
                             consul_path, r.status)

                new_index = int(r.headers["X-Consul-Index"])
                if new_index < index:
                    index = 0
                    logger.warning("dir_task: path=%s: resetting index",
                                   consul_path)
                else:
                    index = new_index

                if r.status != 200:
                    continue

                j = await r.json()
        except (
                aiohttp.client_exceptions.ClientConnectorError,
                aiohttp.client_exceptions.ServerDisconnectedError,
                asyncio.exceptions.TimeoutError,
        ) as e:
            logger.warning("dir_task: consul server: %s", e)
            continue

        items = {
            i["Key"][len(consul_path) + 1:]: base64.b64decode(i["Value"])
            for i in j
        }

        for k, v in items.items():
            logger.debug("dir_task: path=%s: writing '%s'", consul_path, k)

            fpath = path / k

            with atomicwrites.atomic_write(fpath, mode="wb",
                                           overwrite=True) as f:
                f.write(v)

            fpath.chmod(0o444)

            prev_items.discard(k)

        for i in prev_items:
            logger.debug("dir_task: path=%s: deleting '%s'", consul_path, i)
            path.joinpath(i).unlink()

        prev_items = set(items)
Пример #54
0
def _write_param(key, value):
    param_path = os.path.join(PARAMS_DIR, key)
    with atomic_write(param_path, overwrite=True) as f:
        f.write(json.dumps(value))
    os.chmod(param_path, 0o666)
Пример #55
0
			if playlist['name'] == "Discover Weekly":
				continue

			print(playlist['name'])
			print('  total tracks', playlist['tracks']['total'])
			results = sp.user_playlist(username, playlist['id'], fields="tracks,next")
			deeztracks = []
			tracks = results['tracks']
			deeztracks += tracks['items']
			show_tracks(tracks)
			while tracks['next']:
				tracks = sp.next(tracks)
				deeztracks += tracks['items']
				show_tracks(tracks)
			trackery += [deeztracks]

		random.shuffle(trackery)
		flattrack = [track for album in trackery for track in album]

		pp = pprint.PrettyPrinter(indent=4)

		print("Creating . . .")
		pigl = sp.user_playlist_create(username, "Shuffle!", public=False)
		for offset in range(0, len(flattrack), 50):
			sp.user_playlist_add_tracks(username, pigl['id'], [track['track']['id'] for track in flattrack[offset:offset+50]])
	else:
		print("Can't get token for", username)

with atomic_write('cache.yaml', overwrite=True) as f:
	yaml.dump(cache, f)
Пример #56
0
    def get_next_statement(self,
                           existing_statements=set(),
                           downloaded_statements=set()):
        pay_history, = self.wait_and_return(
            lambda: self.find_element_in_any_frame(
                By.PARTIAL_LINK_TEXT, "Pay History", only_displayed=True))
        pay_history.click()

        def get_statement_table():
            try:
                for table in self.find_elements_in_any_frame(
                        By.TAG_NAME, 'table', only_displayed=True):
                    headings = [
                        x.text.strip()
                        for x in table.find_elements_by_xpath('thead/tr/th')
                    ]
                    if 'Pay Date' in headings and 'Document Number' in headings:
                        return table
            except:
                import traceback
                traceback.print_exc()

        table, = self.wait_and_return(get_statement_table)
        date_format = '%m/%d/%Y'
        for row in table.find_elements_by_xpath('tbody/tr'):
            row_text = [
                x.text.strip() for x in row.find_elements_by_tag_name('td')
            ]
            row_text = [x for x in row_text if x]
            pay_date = row_text[0]
            document_number = row_text[1]
            assert re.fullmatch('[0-9A-Z]+', document_number), document_number
            pay_date = datetime.datetime.strptime(pay_date, date_format).date()
            document_str = 'Document %r : %r' % (pay_date, document_number)
            if (pay_date, document_number) in existing_statements:
                logger.info('  Found in existing')
                continue
            if (pay_date, document_number) not in downloaded_statements:
                logger.info('%s:  Downloading', document_str)
                link = row.find_element_by_tag_name('a')
                link.click()
                download_link, = self.wait_and_return(
                    lambda: self.find_element_in_any_frame(
                        By.XPATH,
                        '//input[@type="image" and contains(@title, "Download")]'
                    ))
                download_link.click()
                logger.info('%s: Waiting to get download', document_str)
                download_result, = self.wait_and_return(
                    self.get_downloaded_file)
                name, data = download_result
                if len(data) < 5000:
                    raise RuntimeError('Downloaded file size is invalid: %d' %
                                       len(data))
                output_name = '%s.statement-%s.pdf' % (
                    pay_date.strftime('%Y-%m-%d'), document_number)
                output_path = os.path.join(self.output_directory, output_name)
                with atomic_write(output_path, mode='wb') as f:
                    f.write(data)
                downloaded_statements.add((pay_date, document_number))
                return True
            else:
                logger.info('%s: Just downloaded', document_str)
        return False
Пример #57
0
 def _save_token(token):
     with atomic_write(token_file, mode='w', overwrite=True) as f:
         json.dump(token, f)
Пример #58
0
def save_df_as_csv(
    df, path, filename, comment="", prepend_info=True, atomic=True, **kwargs
):
    """ Save dataframe to a CSV file.

    Parameters
    ----------
    df : pandas.DataFrame
        dataframe to save
    path : str
        path to directory where to save CSV file
    filename : str or buffer
        filename for file to save or buffer to write to
    comment : str
        header comment(s); one or more lines starting with '#'
    prepend_info : bool
        prepend file generation information as comments
    atomic : bool
        atomically write output to a file on local filesystem
    **kwargs
        additional parameters to `pandas.DataFrame.to_csv`

    Returns
    -------
    str or buffer
        path to saved file or buffer (empty str if error)
    """
    buffer = False
    if isinstance(filename, io.IOBase):
        buffer = True

    if isinstance(df, pd.DataFrame) and len(df) > 0:
        if not buffer and not create_dir(path):
            return ""

        if buffer:
            destination = filename
        else:
            destination = os.path.join(path, filename)
            logger.info("Saving {}".format(os.path.relpath(destination)))

        if prepend_info:
            s = (
                "# Generated by snps v{}, https://pypi.org/project/snps/\n"
                "# Generated at {} UTC\n".format(
                    snps.__version__,
                    datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
                )
            )
        else:
            s = ""

        s += comment

        if "na_rep" not in kwargs:
            kwargs["na_rep"] = "--"

        if buffer:
            destination.write(s)
            df.to_csv(destination, **kwargs)
            destination.seek(0)
        elif atomic:
            with atomic_write(destination, mode="w", overwrite=True) as f:
                f.write(s)
                # https://stackoverflow.com/a/29233924
                df.to_csv(f, **kwargs)
        else:
            with open(destination, mode="w") as f:
                f.write(s)
                df.to_csv(f, **kwargs)

        return destination
    else:
        logger.warning("no data to save...")
        return ""
Пример #59
0
    def set_meta(self, key, value):
        value = normalize_meta_value(value)

        fpath = os.path.join(self.path, key)
        with atomic_write(fpath, mode='wb', overwrite=True) as f:
            f.write(value.encode(self.encoding))
Пример #60
0
 def save_cache_file(self, data):
     with atomic_write(self.cache_file, overwrite=True) as cache_file:
         json.dump(data, cache_file)