Пример #1
0
    def master(self):
        # Install s3contents to read notebooks from S3
        # run('pip install s3contents')
        run('pip install https://github.com/danielfrg/s3contents/archive/master.zip'
            )
        write(
            '/home/ubuntu/.jupyter/jupyter_notebook_config.py', '''
from s3contents import S3ContentsManager
c = get_config()
# Use existing config
c.NotebookApp.kernel_spec_manager_class = "environment_kernels.EnvironmentKernelSpecManager"
c.NotebookApp.iopub_data_rate_limit = 10000000000
# Tell Jupyter to use S3ContentsManager for all storage.
c.NotebookApp.contents_manager_class = S3ContentsManager
c.S3ContentsManager.bucket = "{bucket}"
c.S3ContentsManager.sse = "aws:kms"
'''.format(bucket=self.store.name))
        # Run ipcontroller
        daemon('ipcontroller', 'ipcontroller --ip="*"')
        local('mkdir -p ~/.ipython/profile_default/security/')
        wait_for_file(
            '/home/ubuntu/.ipython/profile_default/security/ipcontroller-client.json'
        )
        wait_for_file(
            '/home/ubuntu/.ipython/profile_default/security/ipcontroller-engine.json'
        )
        get(
            '/home/ubuntu/.ipython/profile_default/security/ipcontroller-client.json',
            '~/.ipython/profile_default/security/ipcontroller-client.json')
        get(
            '/home/ubuntu/.ipython/profile_default/security/ipcontroller-engine.json',
            '~/.ipython/profile_default/security/ipcontroller-engine.json')
        daemon('notebook', 'jupyter notebook --ip="*" --NotebookApp.token=""')
        sudo('ipcluster nbextension enable')
 def store_remote(self, dest_file, content, manipulate):
     usable_path = self.in_remote_root(dest_file)
     if not exists(usable_path)\
             or self.all_object.get_remote(dest_file) != content:
         mkdir_p(dirname(usable_path))
         write(usable_path, content)
     # XXX: I suspect that ^this and immediate power off of the target
     # system led to truncation of some affected files to length 0!
     manipulate.execute(usable_path)
     self.append_to_file_list(dest_file)
Пример #3
0
def run_mon_jas(sub_dir):
    tau = np.empty((sizes, temps))
    bar = Bar("sampling", max=temps * sizes)
    times = np.zeros(sizes)
    for i, N, in enumerate(Ns):
        for j, T in enumerate(Ts):
            t = time()
            tau[i, j] = Mon_Jasnow(N, T, n, equib)
            bar.next()
            times[i] += time() - t
    bar.finish()

    write(tau, sub_dir, "tau_MJ")
    write(times, sub_dir, "times_MJ")
Пример #4
0
    def test_values(self):
        utilities.write('test_write.csv', self.data)

        reader = csv.DictReader(open('test_write.csv'))

        rows = []
        for row in reader:
            rows.append(row)

        self.assertTrue(rows[0]['iso3'] == 'USA')
        self.assertTrue(rows[1]['iso3'] == 'CAN')
        self.assertTrue(rows[2]['iso3'] == 'MEX')

        self.assertTrue(rows[0]['year'] == '1970')
        self.assertTrue(rows[1]['year'] == '1971')
        self.assertTrue(rows[2]['year'] == '1972')

        self.assertTrue(float(rows[0]['y']) == .1)
        self.assertTrue(float(rows[1]['y']) == .2)
        self.assertTrue(rows[2]['y'] == '')
Пример #5
0
def simulate_data(out_dir, y, se, gold_standard_file, design_file):
    """
    Simulate data by knocking out and adding noise to a gold standard file. How data is knocked out and
    how noise is added is determined by parameters specified in the design file.

    Parameters
    ----------
    out_dir : string
        The path to a directory in which to output the noisy and knocked out data. The path should end with a /
    y : string
        The column name in the gold standard file corresponding to the response to be knocked out and noised up.
    se : string
        The column name in the gold standard file corresponding to the standard error of the response.
        If se == '', then a se variable will be created named 'se' and filled with 0's. If noise is added, then
        this se variable will be set to the standard error of the noise.
    gold_standard_file : string
        The path to a csv.
    design_file : string
        The path to a csv. If a knock out test is to be performed, there must be a column called knockerouters.
        If noise is to be added, there must be a column called noisers. If there is a column called rep, then
        each test will be repeated rep times. If no such column is provided, each test will only be run once.
        All other columns are parameters for the knockerouter function or the noiser function. These two functions
        must not not share column names for parameters. All column entries (not the header) must be enclosed in
        double quotes. A string will be enclosed in single quotes and then double quotes (e.g. \"'USA'\"), whereas
        a number or an array will be enclosed only in double quotes (e.g. \"2\", \"[1,2,3]\").
        
    See Also
    --------
    utilities.read
    """

    if os.path.isdir(out_dir) == False:
        os.mkdir(out_dir)

    gold_data = utilities.read(gold_standard_file)

    if se == "":
        gold_data = numpy.lib.recfunctions.append_fields(gold_data, "se", [0] * len(gold_data), "<f4")
        se = "se"

    reader = csv.reader(open(design_file))

    on_header = True
    index = 0
    rep_index = np.nan
    for row in reader:
        if on_header == True:
            header = row
            on_header = False

            for i in range(0, len(header)):
                if header == "rep":
                    rep_index = i
        else:
            if utilities.is_nan(rep_index) == True:
                reps = 1
            else:
                reps = int(row[rep_index])

            for i in range(0, reps):
                data = gold_data

                row_dict = {}
                for j, name in enumerate(header):
                    row_j = eval(row[j])

                    row_dict[name] = row_j

                for func_collection in ["knockerouters", "noisers", "biasers"]:
                    if row_dict.has_key(func_collection) == True:
                        fun_str = func_collection + "." + row_dict[func_collection]
                        if func_collection == "biasers":
                            fun_str = fun_str + "(data, y"
                        else:
                            fun_str = fun_str + "(data, y, se"

                        get_args_str = "inspect.getargspec(" + func_collection + "." + row_dict[func_collection] + ")"

                        args = eval(get_args_str)[0]
                        for arg in args:
                            if (arg in ["data", "y", "se"]) == False:
                                fun_str = fun_str + ", row_dict['" + arg + "']"
                        fun_str = fun_str + ")"

                        data = eval(fun_str)

                gold_standard_file = gold_standard_file.split("/")
                gold_standard_file = gold_standard_file[len(gold_standard_file) - 1]

                new_file = (
                    out_dir + "sim_" + gold_standard_file.replace(".csv", "") + "_" + str(index) + "_" + str(i) + ".csv"
                )

                utilities.write(new_file, data)
                index = index + 1
 def do_file_list(self):
     mkdir_p(self.file_list_dir)
     write(self.file_list_file_name(),
             '\n'.join(sorted(self.file_list)) + '\n')
def write_file(file_name, content):
    write(file_name, content + '\n')