Пример #1
0
    def set_mc_interval(self, mc_interval):
        self.y = copy.deepcopy(self.y_original)
        self.y = self.y[mc_interval[0]:mc_interval[1], :]

        self.N_configurations = self.y.shape[0]

        # Sets up variables deependent on the number of configurations again
        self.unanalyzed_y_data = np.zeros((self.NFlows, self.N_configurations))
        self.autocorrelations = np.zeros(
            (self.NFlows, self.N_configurations / 2))
        self.autocorrelations_errors = np.zeros(
            (self.NFlows, self.N_configurations / 2))

        self.mc_interval = mc_interval

        # Creates a new folder to store t0 results in
        self.observable_output_folder_path = os.path.join(
            self.observable_output_folder_path_old,
            "MCint%03d-%03d" % mc_interval)
        check_folder(self.observable_output_folder_path, self.dryrun,
                     self.verbose)

        # Checks that {post_analysis_folder}/{observable_name}/{time interval}
        # exists.
        self.post_analysis_folder = os.path.join(
            self.post_analysis_folder_old, "%03d-%03d" % self.mc_interval)
        check_folder(self.post_analysis_folder, self.dryrun, self.verbose)

        # Resets some of the ac, jk and bs variable
        self.bootstrap_performed = False
        self.jackknife_performed = False
        self.autocorrelation_performed = False
Пример #2
0
def heatmap_plotter(x,
                    y,
                    z,
                    figure_name,
                    tick_param_fs=None,
                    label_fs=None,
                    vmin=None,
                    vmax=None,
                    xlabel=None,
                    ylabel=None,
                    cbartitle=None,
                    x_tick_mode="int",
                    y_tick_mode="int",
                    figure_folder=""):
    """Plots a heatmap surface."""
    fig, ax = plt.subplots()

    if x_tick_mode == "exp":
        xheaders = ['%1.1e' % i for i in x]
    elif x_tick_mode == "int":
        xheaders = ['%d' % int(i) for i in x]
    elif x_tick_mode == "float":
        xheaders = ['%1.2f' % i for i in x]
    else:
        xheaders = ['%g' % i for i in x]

    if y_tick_mode == "exp":
        yheaders = ['%1.1e' % i for i in y]
    elif y_tick_mode == "int":
        yheaders = ['%d' % int(i) for i in y]
    elif y_tick_mode == "float":
        yheaders = ['%1.2f' % i for i in y]
    else:
        yheaders = ['%g' % i for i in y]

    heatmap = ax.pcolormesh(z,
                            edgecolors="k",
                            linewidth=2,
                            vmin=vmin,
                            vmax=vmax,
                            cmap="YlGnBu")
    cbar = plt.colorbar(heatmap, ax=ax)
    cbar.ax.tick_params(labelsize=tick_param_fs)
    cbar.ax.set_title(cbartitle, fontsize=label_fs)

    # # ax.set_title(method, fontsize=fs1)
    ax.set_xticks(np.arange(z.shape[1]) + .5, minor=False)
    ax.set_yticks(np.arange(z.shape[0]) + .5, minor=False)

    ax.set_xticklabels(xheaders, rotation=90, fontsize=tick_param_fs)
    ax.set_yticklabels(yheaders, fontsize=tick_param_fs)

    ax.set_xlabel(xlabel, fontsize=label_fs)
    ax.set_ylabel(ylabel, fontsize=label_fs)

    check_folder(figure_folder)
    figure_path = os.path.join(figure_folder, figure_name)
    fig.savefig(figure_path)
    print("Figure saved at {}".format(figure_path))
    plt.close(fig)
Пример #3
0
    def _set_mc_interval(self, mc_interval):
        """
        Selects a Monte Carlo interval in the MC history to use based on the
        tuple provided.
        """
        if isinstance(mc_interval, types.NoneType):
            self.mc_interval = mc_interval
            return

        assert isinstance(mc_interval, (tuple, list)), \
            "invalid type: " + type(mc_interval)

        self.mc_interval = mc_interval

        # Sets the new y config range
        self.y = self.y[mc_interval[0]:mc_interval[1], :]

        # Updates the title name or label to include range
        self.fig_label = r"MC interval: $[%d, %d)$" % mc_interval
        # Checks and creates file folders for mc interval
        self.observable_output_folder_path = os.path.join(
            self.observable_output_folder_path, "MCint%03d-%03d" % mc_interval)
        check_folder(self.observable_output_folder_path, self.dryrun,
                     self.verbose)

        # Checks that {post_analysis_folder}/{observable_name}/{mc-interval}
        # exists.
        self.post_analysis_folder = \
            os.path.join(self.post_analysis_folder,
                         "%03d-%03d" % self.mc_interval)
        check_folder(self.post_analysis_folder, self.dryrun, self.verbose)
Пример #4
0
def weak_scaling_flow():
    """
    Weak scaling analysis.
    """

    # Basic parameters
    verbose = True
    run_pre_analysis = True
    # batch_folder = check_relative_path("data/scaling_output")
    base_figure_folder = check_relative_path("figures/")
    base_figure_folder = os.path.join(base_figure_folder, "weak_scaling")
    check_folder(base_figure_folder, verbose=verbose)
    default_params = get_default_parameters(data_batch_folder="temp",
                                            include_euclidean_time_obs=False)

    # Build correct files list
    # weak_scaling_files = filter(
    #     lambda _f: True if "weak_scaling" in _f else False,
    #     os.listdir(batch_folder))
    with open(datapath, "r") as f:
        string_scaling_times = json.load(f)["runs"]
    string_scaling_times = filter(
        lambda _f: True
        if "weak_scaling" in _f["runname"] else False, string_scaling_times)

    # Splits into gen, io, flow
    weakrong_scaling = filter(
        lambda _f: True
        if "gen" in _f["runname"] else False, string_scaling_times)
    weakong_scaling = filter(
        lambda _f: True
        if "io" in _f["runname"] else False, string_scaling_times)
    weaktrong_scaling = filter(
        lambda _f: True
        if "flow" in _f["runname"] else False, string_scaling_times)
	def _get_plot_figure_name(self, output_folder=None, 
		figure_name_appendix=""):
		"""Retrieves appropriate figure file name."""
		if isinstance(output_folder, types.NoneType):
			output_folder = os.path.join(self.output_folder_path, "slices")
		check_folder(output_folder, False, True)
		fname = "post_analysis_%s_%s_tf%f_mc%s%s.pdf" % (
			self.observable_name_compact,self.analysis_data_type, 
			self.interval_index, self.mc_int_str, figure_name_appendix)
		return os.path.join(output_folder, fname)
def plot9_figures(t,
                  x,
                  y,
                  z,
                  z_error,
                  figure_name,
                  xlabel=None,
                  ylabel=None,
                  figure_folder="",
                  mark_interval=1):

    fig, axes = plt.subplots(3, 3, sharex=True, sharey=True)

    flip_axis = 0
    t = np.flip(t, axis=flip_axis)
    x = np.flip(x, axis=flip_axis)
    y = np.flip(y, axis=flip_axis)
    z = np.flip(z, axis=flip_axis)
    z_error = np.flip(z_error, axis=flip_axis)

    for i in range(x.shape[0]):
        for j in range(x.shape[1]):
            axes[i,
                 j].errorbar(t[i, j],
                             z[i, j],
                             yerr=z_error[i, j],
                             label=r"$N_\mathrm{corr}=%d, N_\mathrm{up}=%d$" %
                             (x[i, j], y[i, j]),
                             alpha=1.0,
                             capsize=5,
                             fmt="_",
                             markevery=mark_interval,
                             errorevery=mark_interval)

            if i == 1 and j == 0:
                axes[i, j].set_ylabel(ylabel, fontsize=12)
            if i == 2 and j == 1:
                axes[i, j].set_xlabel(xlabel, fontsize=12)

            axes[i, j].legend(fontsize=6, loc="upper left")
            axes[i, j].grid(True)

    plt.subplots_adjust(left=0.08,
                        bottom=0.1,
                        right=0.96,
                        top=0.96,
                        wspace=0.09,
                        hspace=0.1)
    check_folder(figure_folder)
    figure_path = os.path.join(figure_folder, figure_name)
    fig.savefig(figure_path)
    print("Figure saved at {}".format(figure_path))
    plt.close(fig)
Пример #7
0
    def plot_continuum(self, fit_target, interval_keys, **kwargs):
        """
        Continuum plotter for topsus qtq0 in fixed flow time.

        Args:
                fit_target: float value at which we extrapolate to continuum 
                        from.
                interval_keys: list of str, for a given interval euclidean
                        specified from setup_interval().
                **kwargs: passed to plot_continuum().
        """

        if len(list(set(self.beta_values.values()))) != len(self.batch_names):
            print(
                "Multiple values for a beta value: {} --> Skipping"
                " continuum extrapolation".format(self.beta_values.values()))
            return

        # Backs up old variables
        self.plot_values_old = self.plot_values
        self.output_folder_path_old = self.output_folder_path

        # Sets plot values
        self._initiate_plot_values(self.data[self.analysis_data_type],
                                   self.data_raw[self.analysis_data_type],
                                   interval_keys=interval_keys)

        self._update_interval_string(interval_keys)

        self.output_folder_path = os.path.join(
            self.output_folder_path,
            "%s%s" % (self.subfolder_type, self.intervals_str_compact))
        check_folder(self.output_folder_path, self.dryrun, self.verbose)

        # Retrieves data for analysis.
        if fit_target == -1:
            fit_target = self.plot_values[max(self.plot_values)]["x"][-1]
        fit_targets = self.get_fit_targets(fit_target)
        if self.verbose:
            print "Fit targets: ", fit_targets

        self.output_folder_path = os.path.join(
            self.output_folder_path,
            "-".join([("%.2f" % _ft).replace(".", "_")
                      for _ft in fit_targets]))
        check_folder(self.output_folder_path, self.dryrun, self.verbose)

        super(MultiPlotCore, self).plot_continuum(fit_target, **kwargs)

        # Resets the plot values and output folder path
        self.plot_values = self.plot_values_old
        self.output_folder_path = self.output_folder_path_old
Пример #8
0
    def setEQ0(self, t_euclidean_index):
        """
        Sets the Euclidean time we are to analyse for. E.g. if it is 0.9, it 
        will be the Q that is closest to 90% of the total flowed time.

        Args:
                t_euclidean_index: integer of what time point we will look at
        """

        # Finds the euclidean time zero index
        self.t_euclidean_index = t_euclidean_index

        self.V = self.lattice_size / float(self.NT)
        self.const = self.hbarc / self.a / self.V**0.25
        self.const_err = self.hbarc * self.a_err / self.a**2 / self.V**0.25
        self.function_derivative_parameters = \
            [{"const": self.const} for i in xrange(self.NFlows)]

        # Sets file name
        self.observable_name = r"$\chi(\langle Q_t Q_{t_{euclidean}}$"
        self.observable_name += r"$\rangle)^{1/4}$ at $i_{euclidean}=%d$" \
            % self.t_euclidean_index

        # Manual method for multiplying the matrices
        y_qe0 = copy.deepcopy(self.y_original[:, :, self.t_euclidean_index])
        self.y = copy.deepcopy(self.y_original)

        # Sums the euclidean time
        self.y = np.sum(self.y, axis=2)

        self.y *= y_qe0

        # Creates a new folder to store t0 results in
        self.observable_output_folder_path = os.path.join(
            self.observable_output_folder_path_old,
            "%04d" % self.t_euclidean_index)
        check_folder(self.observable_output_folder_path, self.dryrun,
                     self.verbose)

        # Checks that {post_analysis_folder}/{observable_name}/{time interval}
        # exist.
        self.post_analysis_folder = os.path.join(
            self.post_analysis_folder_old, "%04d" % self.t_euclidean_index)
        check_folder(self.post_analysis_folder, self.dryrun, self.verbose)

        # Resets some of the ac, jk and bs variable
        self.bootstrap_performed = False
        self.jackknife_performed = False
        self.autocorrelation_performed = False
    def setQ0(self, q0_flow_time, y_label=None):
        """
        Sets the flow time we are to analyse for. E.g. if it is 0.9, it will 
        be the Q that is closest to 90% of the total flowed time.

        Args:
                q0_flow_time: float between 0.0 and 1.0, in which we 
                choose what percentage point of the data we set as q0.

        """

        self._set_q0_time_and_index(q0_flow_time)

        self.plot_vline_at = self.q0_flow_time

        # Sets file name
        self.observable_name = (r"$\chi(\langle Q_t Q_{t_0} \rangle)^{1/4}$"
                                " at $t=%.2f$" % (self.q0_flow_time))

        # Manual method for multiplying the matrices
        y_q0 = copy.deepcopy(self.y_original[:, self.q0_flow_time_index])
        self.y = copy.deepcopy(self.y_original)

        # Multiplying QtQ0
        for iFlow in xrange(self.y.shape[1]):
            self.y[:, iFlow] *= y_q0

        if y_label != None:
            self.y_label = y_label

        # Creates a new folder to store t0 results in
        self.observable_output_folder_path = os.path.join(
            self.observable_output_folder_path_old,
            "%04.2f" % self.q0_flow_time)
        check_folder(self.observable_output_folder_path, self.dryrun,
                     self.verbose)

        # Checks if {post_analysis_folder}/{observable_name}/{time interval}
        # exists.
        self.post_analysis_folder = os.path.join(self.post_analysis_folder_old,
                                                 "%04.2f" % self.q0_flow_time)
        check_folder(self.post_analysis_folder, self.dryrun, self.verbose)

        # Resets some of the ac, jk and bs variable
        self.bootstrap_performed = False
        self.jackknife_performed = False
        self.autocorrelation_performed = False
Пример #10
0
    def _get_plot_figure_name(self,
                              output_folder=None,
                              figure_name_appendix=""):
        """Retrieves appropriate figure file name."""
        if isinstance(output_folder, types.NoneType):
            # Sets up slices folder containing all euclidean times
            output_folder = os.path.join(self.output_folder_path, "slices")
            check_folder(output_folder, False, True)

            # Sets up euclidean time folder
            output_folder = os.path.join(
                output_folder, "te%04d" % (int(100 * self.interval_index[1])))

        check_folder(output_folder, False, True)

        fname = "post_analysis_%s_%s_tf%s%s.pdf" % (
            self.observable_name_compact, self.analysis_data_type,
            str("%4.4f" % self.interval_index[0]).replace(
                ".", "_"), figure_name_appendix)
        return os.path.join(output_folder, fname)
Пример #11
0
    def setEQ0(self, t_euclidean_index):
        """
        Sets the Euclidean time we are to analyse for. Q_{t_E=}
        q_flow_time_zero_percent: float between 0.0 and 1.0, in which we 
        choose what percentage point of the data we set as q0.

        E.g. if it is 0.9, it will be the Q that is closest to 90% of the 
        whole flowed time.

        Args:
                t_euclidean_index: integer of what time point we will look at
        """
        # Finds the euclidean time zero index
        self.t_euclidean_index = t_euclidean_index

        # Sets file name
        self.observable_name = (r"$Q_{t_{euclidean}}$ at $i_{euclidean}=%d$" %
                                self.t_euclidean_index)

        # Manual method for multiplying the matrices
        self.y = copy.deepcopy(self.y_original[:, :, self.t_euclidean_index])

        # Creates a new folder to store t0 results in
        self.observable_output_folder_path = os.path.join(
            self.observable_output_folder_path_old,
            "%04d" % self.t_euclidean_index)
        check_folder(self.observable_output_folder_path, self.dryrun,
                     self.verbose)

        # Checks that {post_analysis_folder}/{observable_name}/{time interval}
        # exist.
        self.post_analysis_folder = os.path.join(
            self.post_analysis_folder_old, "%04d" % self.t_euclidean_index)
        check_folder(self.post_analysis_folder, self.dryrun, self.verbose)

        # Resets some of the ac, jk and bs variable
        self.bootstrap_performed = False
        self.jackknife_performed = False
        self.autocorrelation_performed = False
    def set_t_interval(self, t_interval):
        self.y = copy.deepcopy(self.y_original)
        self.y = self.y[:, :, t_interval[0]:t_interval[1]]
        self.t_interval = t_interval

        # Creates a new folder to store t0 results in
        self.observable_output_folder_path = os.path.join(
            self.observable_output_folder_path_old,
            "int%03d-%03d" % t_interval)
        check_folder(self.observable_output_folder_path, self.dryrun,
                     self.verbose)

        # Checks that {post_analysis_folder}/{observable_name}/{time interval}
        # exist.
        self.post_analysis_folder = os.path.join(self.post_analysis_folder_old,
                                                 "%03d-%03d" % self.t_interval)
        check_folder(self.post_analysis_folder, self.dryrun, self.verbose)

        # Resets some of the ac, jk and bs variable
        self.bootstrap_performed = False
        self.jackknife_performed = False
        self.autocorrelation_performed = False
Пример #13
0
    def plot_continuum(self, fit_target, interval_keys, **kwargs):
        """
		Continuum plotter for topsus qtq0 in fixed flow time.

		Args:
			fit_target: float value at which we extrapolate to continuum from.
			interval_keys: list of str, for a given interval euclidean 
				specified from setup_interval().
			**kwargs: passed to plot_continuum().
		"""

        # Backs up old variables
        self.plot_values_old = self.plot_values
        self.output_folder_path_old = self.output_folder_path

        # Sets plot values
        self._initiate_plot_values(self.data[self.analysis_data_type],
                                   self.data_raw[self.analysis_data_type],
                                   interval_keys=interval_keys)

        self._update_interval_string(interval_keys)

        self.output_folder_path = os.path.join(
            self.output_folder_path,
            "%s%s" % (self.subfolder_type, self.intervals_str_compact))
        check_folder(self.output_folder_path, self.dryrun, self.verbose)

        if fit_target == -1:
            fit_target = self.plot_values[max(self.plot_values)]["x"][-1]
        self.output_folder_path = os.path.join(self.output_folder_path,
                                               "%02.2f" % fit_target)
        check_folder(self.output_folder_path, self.dryrun, self.verbose)

        super(MultiPlotCore, self).plot_continuum(fit_target, **kwargs)

        # Resets the plot values and output folder path
        self.plot_values = self.plot_values_old
        self.output_folder_path = self.output_folder_path_old
Пример #14
0
    def _series_plot_core(self,
                          indexes,
                          beta="all",
                          x_limits=False,
                          y_limits=False,
                          plot_with_formula=False,
                          error_shape="band",
                          fname=None):
        """
		Core structure of the series plot, allows to easily be expanded upon 
		by the needs of the different observables.

		Args:
			indexes: list containing integers of which intervals to plot 
				together.
			beta: beta values to plot. Default is "all". Otherwise, 
				a list of numbers or a single beta value is provided.
			x_limits: limits of the x-axis. Default is False.
			y_limits: limits of the y-axis. Default is False.
			plot_with_formula: bool, default is false, is True will look for 
				formula for the y-value to plot in title.
			error_shape: plot with error bands or with error bars. 
				Options: band, bars
			fname: str, figure name. Default is 
				post_analysis_{obs_name}_{analysis_type}.png
		"""

        old_rc_paramx = plt.rcParams['xtick.labelsize']
        old_rc_paramy = plt.rcParams['ytick.labelsize']
        plt.rcParams['xtick.labelsize'] = 6
        plt.rcParams['ytick.labelsize'] = 6

        # Starts plotting
        fig, axes = plt.subplots(2, 2, sharey=True, sharex=True)

        # Ensures beta is a list
        if not isinstance(beta, list):
            beta = [beta]

        # Sets the beta values to plot
        if beta[0] == "all" and len(beta) == 1:
            beta_values = self.plot_values
        else:
            beta_values = beta

        # Checks that we actually have enough different data points to plot
        comparer = lambda b, ind: len(self.plot_values[b]) > max(ind)
        asrt_msg = "Need at least %d different values. Currently have %d: %s" \
         % (max(indexes), len(self.plot_values.values()[0]),
          ", ".join(self.plot_values.values()[0].keys()))
        if not np.all([comparer(b, indexes) for b in beta_values]):
            print "WARNING:", asrt_msg
            return

        for ax, i in zip(list(itertools.chain(*axes)), indexes):
            for ibeta in sorted(beta_values):
                # Retrieves the values deepending on the indexes provided and
                # beta values.
                value = self.plot_values[ibeta] \
                 [sorted(self.observable_intervals[ibeta])[i]]

                # Retrieves values to plot
                x = value["x"]
                y = value["y"]
                y_err = value["y_err"]

                if error_shape == "band":
                    ax.plot(x,
                            y,
                            "-",
                            label=value["label"],
                            color=self.colors[ibeta])
                    ax.fill_between(x,
                                    y - y_err,
                                    y + y_err,
                                    alpha=0.5,
                                    edgecolor='',
                                    facecolor=self.colors[ibeta])
                elif error_shape == "bars":
                    ax.errorbar(x,
                                y,
                                yerr=y_err,
                                capsize=5,
                                fmt="_",
                                ls=":",
                                label=value["label"],
                                color=self.colors[ibeta],
                                ecolor=self.colors[ibeta])
                else:
                    raise KeyError("%s is not a valid error bar shape." %
                                   error_shape)

                # Basic plotting commands
                ax.grid(True)
                ax.legend(loc="best", prop={"size": 5})

                # Sets axes limits if provided
                if x_limits != False:
                    ax.set_xlim(x_limits)
                if y_limits != False:
                    ax.set_ylim(y_limits)

        # Set common labels
        # https://stackoverflow.com/questions/6963035/pyplot-axes-labels-for-subplots
        fig.text(0.52,
                 0.035,
                 self.x_label,
                 ha='center',
                 va='center',
                 fontsize=9)
        fig.text(0.03,
                 0.5,
                 self.y_label,
                 ha='center',
                 va='center',
                 rotation='vertical',
                 fontsize=11)

        # Sets the title string
        # title_string = r"%s" % self.observable_name
        # if plot_with_formula:
        # 	title_string += r" %s" % self.formula
        # plt.suptitle(title_string)
        # plt.tight_layout(pad=1.7)

        # Saves and closes figure
        if beta == "all":
            folder_name = "beta%s" % beta
        else:
            folder_name = "beta%s" % "-".join([str(i) for i in beta])
        folder_name += "_N%s" % "".join([str(i) for i in indexes])
        folder_path = os.path.join(self.output_folder_path, folder_name)
        check_folder(folder_path, False, True)

        if isinstance(fname, types.NoneType):
            fpath = os.path.join(
                folder_path, "post_analysis_%s_%s.png" %
                (self.observable_name_compact, self.analysis_data_type))
        else:
            fpath = os.path.join(folder_path, fname)

        plt.savefig(fpath, dpi=self.dpi)
        if self.verbose:
            print "Figure saved in %s" % fpath
        # plt.show()
        plt.close(fig)

        plt.rcParams['xtick.labelsize'] = old_rc_paramx
        plt.rcParams['ytick.labelsize'] = old_rc_paramy
Пример #15
0
    def set_time(self, q0_flow_time, fold=True):
        """
		Function for setting the flow time we will plot in euclidean time.

		Args:
			q0_flow_time: float, flow time t0 at where to select q0.
		"""

        self.x = self.x_old
        self._set_q0_time_and_index(q0_flow_time)

        # Restores y from original data
        self.y = copy.deepcopy(self.y_original)
        self.y = self.y[:, self.q0_flow_time_index, :]

        # Sets the number of flows as the number of euclidean time slices,
        # as that is now what we are plotting in.
        assert self.y.shape[1] == self.NT, "the first row does not match NT."
        self.NFlows = self.NT

        # Sets file name
        self.observable_name = r"$t_{f}=%.2f$" % (self.q0_flow_time)

        # Sets a new x-axis
        self.x = np.linspace(0, self.NFlows - 1, self.NFlows)

        # Multiplies by Q0 to get the correlator
        y_e0 = copy.deepcopy(self.y_original[:, self.q0_flow_time_index, 0])

        for iteuclidean in xrange(self.NFlows):
            # self.y[:,iteuclidean] = np.roll(self.y[:,iteuclidean], -1, axis=1)
            # self.y[:,iteuclidean] *= y_e0
            self.y[:, iteuclidean] = self.y[:, iteuclidean] * y_e0

            # # Folds, folding is now done in post analysis.
            # if fold:
            # 	last_half = np.flip(self.y[:, self.NT/2:], axis=1)
            # 	first_half = self.y[:, :self.NT/2]
            # 	print self.y.shape, first_half.shape, last_half.shape,
            # 	self.y = np.concatenate((first_half, last_half), axis=0)

            self.N_configurations, self.NT = self.y.shape

        # self.y = np.log(self.y/np.roll(self.y, -1, axis=1)) # C(t)/C(t+1)

        # Sets up variables deependent on the number of configurations again
        self.unanalyzed_y = np.zeros(self.NFlows)
        self.unanalyzed_y_std = np.zeros(self.NFlows)
        self.unanalyzed_y_data = np.zeros((self.NFlows, self.N_configurations))

        # Resets bootstrap arrays
        self.bs_y = np.zeros(self.NFlows)
        self.bs_y_std = np.zeros(self.NFlows)

        # Resets jackknifed arrays
        self.jk_y = np.zeros(self.NFlows)
        self.jk_y_std = np.zeros(self.NFlows)

        # Resets autocorrelation arrays
        self.autocorrelations = np.zeros(
            (self.NFlows, self.N_configurations / 2))
        self.autocorrelations_errors = np.zeros(
            (self.NFlows, self.N_configurations / 2))
        self.integrated_autocorrelation_time = np.ones(self.NFlows)
        self.integrated_autocorrelation_time_error = np.zeros(self.NFlows)
        self.autocorrelation_error_correction = np.ones(self.NFlows)

        # Creates a new folder to store results in {beta}/{observable_name}/
        # {flow time} exist.
        self.observable_output_folder_path = os.path.join(
            self.observable_output_folder_path_old,
            "tflow%04.4f" % self.q0_flow_time)
        check_folder(self.observable_output_folder_path, self.dryrun,
                     self.verbose)

        # Checks that {post_analysis_folder}/{observable_name}/{flow time}
        # exist.
        self.post_analysis_folder = os.path.join(
            self.post_analysis_folder_old, "tflow%04.4f" % self.q0_flow_time)
        check_folder(self.post_analysis_folder, self.dryrun, self.verbose)

        # Resets some of the ac, jk and bs variable
        self.bootstrap_performed = False
        self.jackknife_performed = False
        self.autocorrelation_performed = False
Пример #16
0
    def __init__(self,
                 data,
                 mc_interval=None,
                 figures_folder=False,
                 parallel=False,
                 numprocs=4,
                 dryrun=False,
                 verbose=False):
        """
        Parent class for analyzing flowed observables.

        Args:
                data: DataReader([observable_name]), an DataReader object
                        called with the compact observable name. Options:
                        "plaq", "energy", "topc".
                mc_interval: optional, tuple, will only look at Monte Carlo
                        history inside interval. Default is using all
                        Monte Carlo of the Monte Carlo history available.
                figures_folder: optional argument for where to place the
                        figures created. Default is "../figures".
                parallel: optinal argument if we are to run analysis in
                        parallel. Default is False.
                numprocs: optional argument for the number of processors to
                        use. Default is 4.
                dryrun: optional dryrun mode. Default is False.
                verbose: optional argument for a more verbose run. Default is
                        False.

        Returns:
                Object for analyzing flow.
        """
        # Retrieves data from data
        self.batch_name = data["batch_name"]
        self.batch_data_folder = data["batch_data_folder"]
        self.x = data["t"]
        self.y = data["obs"]
        self.flow_epsilon = data["FlowEpsilon"]

        # Sets lattice parameters
        self.beta = data["beta"]
        self.a, self.a_err = get_lattice_spacing(self.beta)
        self.r0 = 0.5  # Sommer Parameters

        # Sets the lattice sizes if one is provided
        self.lattice_size = data["lattice_size"]

        # Initializes up global constants
        self.N_bs = None
        self.dryrun = dryrun
        self.verbose = verbose
        if figures_folder != False:  # Default is just figures
            self.figures_folder = figures_folder

        # Parallel variables
        self.parallel = parallel
        self.numprocs = numprocs

        # Checks that a figures folder exists
        check_folder(self.figures_folder, self.dryrun, verbose=self.verbose)

        # Check that a data run folder exist, so one data anlysis performed on
        # different data sets do not mix
        self.data_batch_folder_path = \
            os.path.join(self.figures_folder,
                         os.path.split(self.batch_data_folder)[-1])
        check_folder(self.data_batch_folder_path,
                     self.dryrun,
                     verbose=self.verbose)

        # Checks that a batch folder exists
        self.batch_name_folder_path = os.path.join(self.data_batch_folder_path,
                                                   self.batch_name)
        check_folder(self.batch_name_folder_path,
                     self.dryrun,
                     verbose=self.verbose)

        # Checks that observable output folder exist, and if not will create it
        self.observable_output_folder_path = os.path.join(
            self.batch_name_folder_path, self.observable_name_compact)
        check_folder(self.observable_output_folder_path,
                     self.dryrun,
                     verbose=self.verbose)

        # Sets up the post analysis folder, but do not create it till its
        # needed.
        self.post_analysis_folder_base = os.path.join(self.batch_data_folder,
                                                      self.batch_name,
                                                      "post_analysis_data")
        check_folder(self.post_analysis_folder_base,
                     self.dryrun,
                     verbose=self.verbose)

        # Checks that {post_analysis_folder}/{observable_name} exists
        self.post_analysis_folder = \
            os.path.join(self.post_analysis_folder_base,
                         self.observable_name_compact)
        check_folder(self.post_analysis_folder,
                     self.dryrun,
                     verbose=self.verbose)

        # Sets the MC interval
        self._set_mc_interval(mc_interval)

        # Makes a backup, for later use
        self.post_analysis_folder_old = self.post_analysis_folder

        # Checks if we already have scaled the x values or not
        # print(np.all(np.abs(np.diff(self.x) - self.flow_epsilon) > 1e-14))
        if np.all(np.abs(np.diff(self.x) - self.flow_epsilon) > 1e-14):
            self.x = self.x * self.flow_epsilon
            self.pre_scale = False
        else:
            self.pre_scale = True

        # Max plotting window variables
        self.y_limits = [None, None]

        # Default type of observables, one per configuration per flow
        self.N_configurations, self.NFlows = self.y.shape[:2]

        self._analysis_arrays_setup()
Пример #17
0
def topc_modes_analysis():
    """Analysis for different lattice sizes and their topological charges."""
    default_params = get_default_parameters(data_batch_folder="temp")
    default_params["blocking_analysis"] = True

    default_params["observables"] = ["plaq", "topc", "topc2", "topc4", "topcr",
                                     "topsus", "topsusqtq0", "qtq0e",
                                     "qtq0eff", "topcMC"]
    default_params["observables"] = ["topc2", "topc4", "topcr"]

    # Check to only generate data for post-analysis
    default_params["only_generate_data"] = False

    run_pre_analysis = False
    run_post_analysis = False

    # run_pre_analysis = True
    # run_post_analysis = True
    default_params["verbose"] = True

    ########## Post analysis parameters ##########
    line_fit_interval_points = 20
    topsus_fit_targets = [0.5, 0.6]
    energy_fit_target = 0.3
    q0_flow_times = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6]  # [fermi]
    euclidean_time_percents = [0, 0.25, 0.50, 0.75, 1.00]
    extrapolation_methods = ["bootstrap"]
    plot_continuum_fit = False
    post_analysis_data_type = ["bootstrap"]
    figures_folder = "figures/topc_modes_analysis"

    data_path = "../data/"
    if not os.path.isdir(data_path):
        data_path = "../" + data_path

    ########## Smaug data 8x16 analysis ##########
    smaug8x16_data_beta60_analysis = copy.deepcopy(default_params)
    smaug8x16_data_beta60_analysis["batch_folder"] = data_path
    smaug8x16_data_beta60_analysis["batch_name"] = "beta60_8x16_run"
    smaug8x16_data_beta60_analysis["ensemble_name"] = r"$E$"
    smaug8x16_data_beta60_analysis["beta"] = 6.0
    smaug8x16_data_beta60_analysis["block_size"] = 10  # None
    smaug8x16_data_beta60_analysis["topc_y_limits"] = [-2, 2]
    smaug8x16_data_beta60_analysis["num_bins_per_int"] = 32
    smaug8x16_data_beta60_analysis["bin_range"] = [-2.5, 2.5]
    smaug8x16_data_beta60_analysis["hist_flow_times"] = [0, 250, 600]
    smaug8x16_data_beta60_analysis["NCfgs"] = get_num_observables(
        smaug8x16_data_beta60_analysis["batch_folder"],
        smaug8x16_data_beta60_analysis["batch_name"])
    smaug8x16_data_beta60_analysis["obs_file"] = "8_6.00"
    smaug8x16_data_beta60_analysis["N"] = 8
    smaug8x16_data_beta60_analysis["NT"] = 16
    smaug8x16_data_beta60_analysis["color"] = "#377eb8"

    ########## Smaug data 12x24 analysis ##########
    smaug12x24_data_beta60_analysis = copy.deepcopy(default_params)
    smaug12x24_data_beta60_analysis["batch_folder"] = data_path
    smaug12x24_data_beta60_analysis["batch_name"] = "beta60_12x24_run"
    smaug12x24_data_beta60_analysis["ensemble_name"] = r"$F$"
    smaug12x24_data_beta60_analysis["beta"] = 6.0
    smaug12x24_data_beta60_analysis["block_size"] = 10  # None
    smaug12x24_data_beta60_analysis["topc_y_limits"] = [-4, 4]
    smaug12x24_data_beta60_analysis["num_bins_per_int"] = 16
    smaug12x24_data_beta60_analysis["bin_range"] = [-4.5, 4.5]
    smaug12x24_data_beta60_analysis["hist_flow_times"] = [0, 100, 600]
    smaug12x24_data_beta60_analysis["NCfgs"] = get_num_observables(
        smaug12x24_data_beta60_analysis["batch_folder"],
        smaug12x24_data_beta60_analysis["batch_name"])
    smaug12x24_data_beta60_analysis["obs_file"] = "12_6.00"
    smaug12x24_data_beta60_analysis["N"] = 12
    smaug12x24_data_beta60_analysis["NT"] = 24
    smaug12x24_data_beta60_analysis["color"] = "#377eb8"

    ########## Smaug data 16x32 analysis ##########
    smaug16x32_data_beta61_analysis = copy.deepcopy(default_params)
    smaug16x32_data_beta61_analysis["batch_folder"] = data_path
    smaug16x32_data_beta61_analysis["batch_name"] = "beta61_16x32_run"
    smaug16x32_data_beta61_analysis["ensemble_name"] = r"$G$"
    smaug16x32_data_beta61_analysis["beta"] = 6.1
    smaug16x32_data_beta61_analysis["block_size"] = 10  # None
    smaug16x32_data_beta61_analysis["topc_y_limits"] = [-8, 8]
    smaug16x32_data_beta61_analysis["num_bins_per_int"] = 16
    smaug16x32_data_beta61_analysis["bin_range"] = [-7.5, 7.5]
    smaug16x32_data_beta61_analysis["hist_flow_times"] = [0, 100, 400]
    smaug16x32_data_beta61_analysis["NCfgs"] = get_num_observables(
        smaug16x32_data_beta61_analysis["batch_folder"],
        smaug16x32_data_beta61_analysis["batch_name"])
    smaug16x32_data_beta61_analysis["obs_file"] = "16_6.10"
    smaug16x32_data_beta61_analysis["N"] = 16
    smaug16x32_data_beta61_analysis["NT"] = 32
    smaug16x32_data_beta61_analysis["color"] = "#377eb8"

    param_list = [
        smaug8x16_data_beta60_analysis,
        smaug12x24_data_beta60_analysis,
        smaug16x32_data_beta61_analysis]

    if run_pre_analysis:
        # Submitting analysis
        for analysis_parameters in param_list:
            pre_analysis(analysis_parameters)

    # Submitting post-analysis data
    if run_post_analysis:
        if len(param_list) >= 3:
            post_analysis(param_list,
                          default_params["observables"],
                          topsus_fit_targets, line_fit_interval_points,
                          energy_fit_target,
                          q0_flow_times, euclidean_time_percents,
                          extrapolation_methods=extrapolation_methods,
                          plot_continuum_fit=plot_continuum_fit,
                          post_analysis_data_type=post_analysis_data_type,
                          figures_folder=figures_folder, # "figures/topc_modes_analysis"
                          verbose=default_params["verbose"])
        else:
            msg = "Need at least 3 different beta values to run post analysis"
            msg += "(%d given)." % len(analysis_parameter_list)
            print msg

    # Loads topc data
    data = []
    # N_val = [24, 24, 28]
    for i, param in enumerate(param_list):
        print "Loading data for: {}".format(param["batch_name"])
        data_, p = get_data_parameters(param)
        data.append({"data": data_("topc")["obs"].T,
                     "beta": param["beta"],
                     "N": param["N"],
                     "ensemble_name": param["ensemble_name"]})

    # Flow time to plots
    flow_times = [0, 25, 50, 100, 150, 250, 450, 600]

    # Histogram plotting
    xlim = 7.5
    NBins = np.arange(-xlim, xlim, 0.05)
    for t_f in flow_times:
        # Adds unanalyzed data
        fig, axes = plt.subplots(len(param_list), 1,
                                 sharey=False, sharex=True)
        axes = np.atleast_1d(axes)
        for i, ax in enumerate(axes):
            # lab = r"${0:d}^3\times{1:d}$, $\beta={2:.2f}$".format(
            #     data[i]["N"], data[i]["N"]*2, data[i]["beta"])
            lab = data[i]["ensemble_name"]

            weights = np.ones_like(data[i]["data"][t_f])
            weights /= len(data[i]["data"][t_f])
            ax.hist(data[i]["data"][t_f], bins=NBins,
                    label=lab, weights=weights)
            ax.legend(loc="upper right")
            ax.grid(True)
            ax.set_xlim(-xlim, xlim)

            if i == 1:
                ax.set_ylabel(r"Hits(normalized)")
            elif i == 2:
                ax.set_xlabel(r"$Q$")

        # Sets up figure
        figpath = figures_folder
        if not os.path.isdir(figpath):
            figpath = "../" + figpath
        check_folder(figpath, verbose=default_params["verbose"])
        figpath = os.path.join(figpath, "topc_modes_tf{}.pdf".format(t_f))
        fig.savefig(figpath)
        print "Figure saved at {0:s}".format(figpath)
        plt.close(fig)
Пример #18
0
    def __init__(self,
                 data,
                 with_autocorr=True,
                 figures_folder="../figures",
                 verbose=False,
                 dryrun=False):
        """
		Base class for analysing beta values together after initial analysis.

		Args:
			data: PostAnalysisDataReader object, contains all of the 
				observable data.
			with_autocorr: bool, optional. Will perform analysis on data
				corrected by autocorrelation sqrt(2*tau_int). Default is True.
			figures_folder: str, optional. Default output folder is ../figures.
			verbose: bool, optional. A more verbose output. Default is False.
			dryrun: bool, optional. No major changes will be performed. 
				Default is False.
		"""

        if with_autocorr:
            self.ac = "with_autocorr"
        else:
            self.ac = "without_autocorr"

        self.with_autocorr = with_autocorr
        self.reference_values = data.reference_values
        observable = self.observable_name_compact

        self.verbose = verbose
        self.dryrun = dryrun

        self.beta_values = sorted(data.beta_values)
        self.colors = data.colors
        self.lattice_sizes = data.lattice_sizes
        self.size_labels = data.labels
        self._setup_analysis_types(data.analysis_types)
        self.print_latex = data.print_latex

        self.data = {atype: {beta: {} for beta in self.beta_values} \
         for atype in self.analysis_types}

        self.flow_epsilon = {b: data.flow_epsilon[b] for b in self.beta_values}

        # Only sets this variable if we have sub-intervals in order to avoid bugs.
        if self.sub_obs:
            self.observable_intervals = {b: {} for b in self.beta_values}
            # for beta in self.beta_values:
            # 	self.observable_intervals[beta] = {}

        # Checks that the observable is among the available data
        assert_msg = ("%s is not among current data(%s). Have the pre analysis"
                      " been performed?" %
                      (observable, ", ".join(data.observable_list)))
        assert observable in data.observable_list, assert_msg

        for atype in self.analysis_types:
            for beta in self.beta_values:
                if self.sub_obs:
                    if self.sub_sub_obs:
                        for subobs in data.data_observables[observable][beta]:
                            # Sets sub-sub intervals
                            self.observable_intervals[beta][subobs] = \
                             data.data_observables[observable] \
                             [beta][subobs].keys()

                            # Sets up additional subsub-dictionaries
                            self.data[atype][beta][subobs] = {}

                            for subsubobs in data.data_observables \
                             [observable][beta][subobs]:

                                self.data[atype][beta][subobs][subsubobs] = \
                                 data.data_observables[observable][beta] \
                                 [subobs][subsubobs][self.ac][atype]

                                if self.with_autocorr:
                                    self.data[atype][beta][subobs][subsubobs] \
                                     ["ac"] = data.data_observables \
                                     [observable][beta][subobs][subsubobs] \
                                     ["with_autocorr"]["autocorr"]

                    else:
                        # Fills up observable intervals
                        self.observable_intervals[beta] = \
                         data.data_observables[observable][beta].keys()

                        for subobs in data.data_observables[observable][beta]:

                            self.data[atype][beta][subobs] = \
                             data.data_observables[observable][beta] \
                             [subobs][self.ac][atype]

                            if self.with_autocorr:
                                self.data[atype][beta][subobs]["ac"] = \
                                 data.data_observables[observable][beta] \
                                 [subobs]["with_autocorr"]["autocorr"]

                else:
                    self.data[atype][beta] = data.data_observables \
                     [observable][beta][self.ac][atype]

                    if self.with_autocorr:
                        self.data[atype][beta]["ac"] = \
                         data.data_observables[observable][beta] \
                         ["with_autocorr"]["autocorr"]

        self.data_raw = {}
        for atype in data.raw_analysis:
            if atype == "autocorrelation":
                self.ac_raw = data.raw_analysis[atype]
            else:
                self.data_raw[atype] = data.raw_analysis[atype]

        # Small test to ensure that the number of bootstraps and number of
        # different beta batches match
        err_msg = ("Number of bootstraps do not match number "
                   "of different beta values")
        assert np.asarray([get_NBoots(self.data_raw["bootstrap"][i]) \
         for i in self.data_raw["bootstrap"].keys()]).all(), err_msg

        self.NBoots = get_NBoots(self.data_raw["bootstrap"])

        # Creates base output folder for post analysis figures
        self.figures_folder = figures_folder
        check_folder(self.figures_folder,
                     dryrun=self.dryrun,
                     verbose=self.verbose)
        check_folder(os.path.join(self.figures_folder, data.batch_name),
                     dryrun=self.dryrun,
                     verbose=self.verbose)

        # Creates output folder
        self.post_anlaysis_folder = os.path.join(self.figures_folder,
                                                 data.batch_name,
                                                 "post_analysis")
        check_folder(self.post_anlaysis_folder,
                     dryrun=self.dryrun,
                     verbose=self.verbose)

        # Creates observable output folder
        self.output_folder_path = os.path.join(self.post_anlaysis_folder,
                                               self.observable_name_compact)
        check_folder(self.output_folder_path,
                     dryrun=self.dryrun,
                     verbose=self.verbose)
Пример #19
0
    def __init__(self,
                 data,
                 with_autocorr=True,
                 figures_folder="../figures",
                 verbose=False,
                 dryrun=False):
        """
        Initializes this specialized form of finding the ratio of different
        topological charge definitions.
        """
        if with_autocorr:
            self.ac = "with_autocorr"
        else:
            self.ac = "without_autocorr"
        self.with_autocorr = with_autocorr
        observable = self.observable_name_compact

        self.verbose = verbose
        self.dryrun = dryrun

        self.beta_values = data.beta_values
        self.sorted_batch_names = data.batch_names
        self.ensemble_names = data.ensemble_names
        self.colors = data.colors
        self.lattice_sizes = data.lattice_sizes
        self.lattice_volumes = data.lattice_volumes
        self.size_labels = data.labels
        self.reference_values = data.reference_values
        self._setup_analysis_types(data.analysis_types)

        self.data = {
            atype: {b: {}
                    for b in self.sorted_batch_names}
            for atype in self.analysis_types
        }

        self.data_map = {
            bn: {
                "lattice_volume": self.lattice_volumes[bn],
                "beta": self.beta_values[bn]
            }
            for bn in self.sorted_batch_names
        }

        self.flow_epsilon = {
            bn: data.flow_epsilon[bn]
            for bn in self.sorted_batch_names
        }

        self.sorted_batch_names = sorted(
            self.sorted_batch_names,
            key=lambda _k:
            (self.data_map[_k]["beta"], self.data_map[_k]["lattice_volume"]))

        # Q^2
        self.topc2 = {
            atype: {bn: {}
                    for bn in self.sorted_batch_names}
            for atype in self.analysis_types
        }

        # Q^4
        self.topc4 = {
            atype: {bn: {}
                    for bn in self.sorted_batch_names}
            for atype in self.analysis_types
        }

        # Q^4_C
        self.topc4C = {
            atype: {bn: {}
                    for bn in self.sorted_batch_names}
            for atype in self.analysis_types
        }

        # R = Q^4_C / Q^2
        self.topcR = {
            atype: {bn: {}
                    for bn in self.sorted_batch_names}
            for atype in self.analysis_types
        }

        # Data will be copied from R
        self.data = {
            atype: {bn: {}
                    for bn in self.sorted_batch_names}
            for atype in self.analysis_types
        }

        # Q^2 and Q^4 raw bs values
        self.topc2_raw = {}
        self.topc4_raw = {}
        self.topc4c_raw = {}
        self.topcR_raw = {}
        self.data_raw = {}
        self.ac_raw = {}

        for atype in data.raw_analysis:
            if atype == "autocorrelation":
                self.ac_raw["tau"] = data.raw_analysis[atype]
            elif atype == "autocorrelation_raw":
                self.ac_raw["ac_raw"] = data.raw_analysis[atype]
            elif atype == "autocorrelation_raw_error":
                self.ac_raw["ac_raw_error"] = data.raw_analysis[atype]
            else:
                self.data_raw[atype] = data.raw_analysis[atype]

        # First, gets the topc2, then topc4
        for atype in self.analysis_types:
            for bn in self.sorted_batch_names:
                # Q^2
                self.topc2[atype][bn] = data.data_observables["topc2"][bn][
                    self.ac][atype]

                # Q^4
                self.topc4[atype][bn] = data.data_observables["topc4"][bn][
                    self.ac][atype]

                if self.with_autocorr:
                    self.topc2[atype][bn]["ac"] = \
                        data.data_observables["topc2"][bn]["with_autocorr"]["autocorr"]

                    self.topc4[atype][bn]["ac"] = \
                        data.data_observables["topc4"][bn]["with_autocorr"]["autocorr"]

        # Creates base output folder for post analysis figures
        self.figures_folder = figures_folder
        check_folder(self.figures_folder,
                     dryrun=self.dryrun,
                     verbose=self.verbose)
        check_folder(os.path.join(self.figures_folder, data.batch_name),
                     dryrun=self.dryrun,
                     verbose=self.verbose)

        # Creates output folder
        self.post_anlaysis_folder = os.path.join(self.figures_folder,
                                                 data.batch_name,
                                                 "post_analysis")
        check_folder(self.post_anlaysis_folder,
                     dryrun=self.dryrun,
                     verbose=self.verbose)

        # Creates observable output folder
        self.output_folder_path = os.path.join(self.post_anlaysis_folder,
                                               self.observable_name_compact)
        check_folder(self.output_folder_path,
                     dryrun=self.dryrun,
                     verbose=self.verbose)

        self._setup_article_values()
        self._normalize_article_values()

        self._setup_volumes()
        self._normalize_Q()
        self._calculate_Q4C()
        self._calculate_R()
Пример #20
0
def scaling_analysis():
    """
    Scaling analysis.
    """

    # Basic parameters
    verbose = True
    run_pre_analysis = True
    json_file = "run_times_tmp.json"
    datapath = os.path.join(("/Users/hansmathiasmamenvege/Programming/LQCD/"
                             "data/scaling_output/"), json_file)
    datapath = os.path.join(("/Users/hansmathiasmamenvege/Programming/LQCD/"
                             "LatticeAnalyser"), json_file)

    slurm_output_folder = check_relative_path("../data/scaling_output")

    slurm_json_output_path = os.path.split(datapath)[0]
    slurm_json_output_path = os.path.join(slurm_json_output_path,
                                          "slurm_output_data.json")

    # Comment this out to use old file
    if os.path.isfile(slurm_json_output_path):
        datapath = slurm_json_output_path

    # Extract times from slurm files and put into json file
    if not os.path.isfile(datapath):
        print "No {} found. Loading slurm data.".format(json_file)
        load_slurm_folder(slurm_output_folder, slurm_json_output_path)
        datapath = slurm_json_output_path

    # Basic figure setup
    base_figure_folder = check_relative_path("figures")
    base_figure_folder = os.path.join(base_figure_folder, "scaling")
    check_folder(base_figure_folder, verbose=verbose)

    # Strong scaling folder setup
    strong_scaling_figure_folder = os.path.join(base_figure_folder, "strong")
    check_folder(strong_scaling_figure_folder, verbose=verbose)

    # Weak scaling folder setup
    weak_scaling_figure_folder = os.path.join(base_figure_folder, "weak")
    check_folder(weak_scaling_figure_folder, verbose=verbose)

    default_params = get_default_parameters(data_batch_folder="temp",
                                            include_euclidean_time_obs=False)

    # Loads scaling times and splits into weak and strong
    with open(datapath, "r") as f:
        scaling_times = json.load(f)["runs"]
    strong_scaling_times = filter_scalings(scaling_times, "strong_scaling_np")
    weak_scaling_times = filter_scalings(scaling_times, "weak_scaling_np")

    # Splits strong scaling into gen, io, flow
    gen_strong_scaling = filter_scalings(strong_scaling_times, "gen")
    io_strong_scaling = filter_scalings(strong_scaling_times, "io")
    flow_strong_scaling = filter_scalings(strong_scaling_times, "flow")

    # Splits weak scaling into gen, io, flow
    gen_weak_scaling = filter_scalings(weak_scaling_times, "gen")
    gen_weak_scaling = filter_duplicates(gen_weak_scaling)
    io_weak_scaling = filter_scalings(weak_scaling_times, "io")
    flow_weak_scaling = filter_scalings(weak_scaling_times, "flow")

    # Adds number of processors to strong scaling
    gen_strong_scaling = add_numprocs(gen_strong_scaling)
    io_strong_scaling = add_numprocs(io_strong_scaling)
    flow_strong_scaling = add_numprocs(flow_strong_scaling)

    # Adds number of processors to strong scaling
    gen_weak_scaling = add_numprocs(gen_weak_scaling)
    io_weak_scaling = add_numprocs(io_weak_scaling)
    flow_weak_scaling = add_numprocs(flow_weak_scaling)

    scalings = [
        gen_strong_scaling, io_strong_scaling, flow_strong_scaling,
        gen_weak_scaling, io_weak_scaling, flow_weak_scaling
    ]

    times_to_scan = ["update_time", "time"]
    times_to_scan = ["time"]

    # For speedup and retrieving parallelizability fraction.
    min_procs = 8

    strong_scaling_list = []
    weak_scaling_list = []

    for time_type in times_to_scan:

        # Loops over scaling values in scalings
        for sv in scalings:
            x = [i["NP"] for i in sv]
            y = [i[time_type] for i in sv]

            # Sets up filename and folder name
            _scaling = list(set([i["runname"].split("_")[0] for i in sv]))
            _sc_part = list(set([i["runname"].split("_")[-1] for i in sv]))
            assert len(_scaling) == 1, \
                "incorrect sv type list: {}".format(_scaling)
            assert len(_sc_part) == 1, \
                "incorrect sv part list length: {}".format(_sc_part)
            _sc_part = _sc_part[0]
            _scaling = _scaling[0]
            figure_name = "{}_{}_{}.pdf".format(_scaling, _sc_part, time_type)

            if _sc_part != "gen" and time_type == "update_time":
                print "Skipping {}".format(figure_name)
                continue

            # Sets correct figure folder
            if _scaling == "strong":
                _loc = "upper right"
                figure_folder = strong_scaling_figure_folder
            elif _scaling == "weak":
                _loc = "upper left"
                figure_folder = weak_scaling_figure_folder
            else:
                raise ValueError("Scaling type not recognized for"
                                 " folder: {}".format(_scaling))

            if _sc_part == "io":
                _label = r"Input/Output"
            elif _sc_part == "gen":
                _label = r"Configuration generation"
            else:
                _label = _sc_part.capitalize()

            _xlabel = r"$N_p$"
            if time_type == "time":
                _time_type = _sc_part

            if _sc_part == "io":
                _ylabel = r"$t_\mathrm{IO}$[s]"
            else:
                _ylabel = r"$t_\mathrm{%s}$[s]" % _time_type.replace(
                    "_", r"\ ").capitalize()

            # Sets speedup labels
            if _sc_part == "io":
                _ylabel_speedup = (
                    r"$t_{\mathrm{IO},N_{p=%s}}/t_{\mathrm{IO},N_p}$"
                    "[s]" % min_procs)
            else:
                _tmp = _time_type.replace("_", r"\ ").capitalize()
                _ylabel_speedup = (
                    r"$t_{\mathrm{%s},N_{p=%s}}/t_{\mathrm{%s},N_p}$"
                    "[s]" % (_tmp, min_procs, _tmp))

            _tmp_dict = {
                "sc": _sc_part,
                "x": np.asarray(x),
                "y": np.asarray(y),
                "label": _label,
                "xlabel": _xlabel,
                "ylabel": _ylabel,
                "ylabel_speedup": _ylabel_speedup,
                "figure_folder": figure_folder,
                "figure_name": figure_name,
                "loc": _loc,
            }

            if _scaling == "strong":
                strong_scaling_list.append(_tmp_dict)
            else:
                weak_scaling_list.append(_tmp_dict)

            plot_scaling(x,
                         y,
                         _label,
                         _xlabel,
                         _ylabel,
                         figure_folder,
                         figure_name,
                         loc=_loc)

    plot_all_scalings(strong_scaling_list, "strong")
    plot_all_scalings(weak_scaling_list, "weak")
    plot_speedup(strong_scaling_list, "strong")
    def set_time(self, q0_flow_time, euclidean_percent):
        """
		Function for setting the flow time we will plot in euclidean time.

		Args:
			q0_flow_time: float, flow time t0 at where to select q0.
			euclidean_percent: float, value between 0 and 1 of where to select
				the euclidean time to set q0 at.
		"""

        self.x = self.x_old
        self._set_q0_time_and_index(q0_flow_time)

        # Finds the q flow time zero value
        self.euclidean_time = int((self.NT - 1) * euclidean_percent)

        # Restores y from original data
        self.y = copy.deepcopy(self.y_original)
        self.y = self.y[:, self.q0_flow_time_index, :]

        # Sets the number of flows as the number of euclidean time slices,
        # as that is now what we are plotting in.
        assert self.y.shape[1] == self.NT, "the first row does not match NT."
        self.NFlows = self.NT

        self.V = self.lattice_size / float(self.NT)
        self.const = (self.hbarc**4) / (self.a**4) / self.V
        self.const = 1.0  # Correlator contains no normalization
        self.function_derivative_parameters = \
         [{"const": self.const} for i in xrange(self.NFlows)]

        self.function_derivative = [ptools._C_derivative]

        # Sets file name
        self.observable_name = r"$t_e=%.2f$, $t_{f}=%.2f$" % (
            self.euclidean_time, self.q0_flow_time)

        # Selects the configurations in euclidean time in flow time to multiply
        y_e0 = copy.deepcopy(self.y_original[:, self.q0_flow_time_index,
                                             self.euclidean_time])

        # Multiplying QtQ0
        for iteuclidean in xrange(self.NFlows):
            self.y[:, iteuclidean] *= y_e0

        # Sets a new x-axis
        self.x = np.linspace(0, self.NFlows - 1, self.NFlows)

        # Sets up variables deependent on the number of configurations again
        self.unanalyzed_y = np.zeros(self.NFlows)
        self.unanalyzed_y_std = np.zeros(self.NFlows)
        self.unanalyzed_y_data = np.zeros((self.NFlows, self.N_configurations))

        # Resets bootstrap arrays
        self.bs_y = np.zeros(self.NFlows)
        self.bs_y_std = np.zeros(self.NFlows)

        # Resets jackknifed arrays
        self.jk_y = np.zeros(self.NFlows)
        self.jk_y_std = np.zeros(self.NFlows)

        # Resets autocorrelation arrays
        self.autocorrelations = np.zeros(
            (self.NFlows, self.N_configurations / 2))
        self.autocorrelations_errors = np.zeros(
            (self.NFlows, self.N_configurations / 2))
        self.integrated_autocorrelation_time = np.ones(self.NFlows)
        self.integrated_autocorrelation_time_error = np.zeros(self.NFlows)
        self.autocorrelation_error_correction = np.ones(self.NFlows)

        # Retrieves old path spec
        self.observable_output_folder_path = self.observable_output_folder_path_old
        self.post_analysis_folder = self.post_analysis_folder_old

        # Creates a new folder to store results in {beta}/{observable_name}/
        # {flow time} exist.
        self.observable_output_folder_path = os.path.join(
            self.observable_output_folder_path,
            "tflow%04.4f" % self.q0_flow_time)
        check_folder(self.observable_output_folder_path, self.dryrun,
                     self.verbose)

        # Creates a new folder to store results in {beta}/{observable_name}/
        # {flow time}/{euclidean time} exist.
        self.observable_output_folder_path = os.path.join(
            self.observable_output_folder_path, "te%04d" % self.euclidean_time)
        check_folder(self.observable_output_folder_path, self.dryrun,
                     self.verbose)

        # Checks that {post_analysis_folder}/{observable_name}/{flow time}
        # exist.
        self.post_analysis_folder = os.path.join(
            self.post_analysis_folder_old, "tflow%04.4f" % self.q0_flow_time)
        check_folder(self.post_analysis_folder, self.dryrun, self.verbose)

        # Checks that {post_analysis_folder}/{observable_name}/{flow time}/
        # {euclidean time} exist.
        self.post_analysis_folder = os.path.join(
            self.post_analysis_folder, "te%04d" % self.euclidean_time)
        check_folder(self.post_analysis_folder, self.dryrun, self.verbose)

        # Resets some of the ac, jk and bs variable
        self.bootstrap_performed = False
        self.jackknife_performed = False
        self.autocorrelation_performed = False
Пример #22
0
def thermalization_analysis():
    """Runs the thermalization analysis."""

    verbose = True
    run_pre_analysis = True
    mark_every = 50
    mc_cutoff = -1  # Skip every 100 points with 2000 therm-steps!!
    batch_folder = check_relative_path("data/thermalization_data")
    base_figure_folder = check_relative_path("figures/")
    base_figure_folder = os.path.join(base_figure_folder,
                                      "thermalization_analysis")
    check_folder(base_figure_folder, verbose=verbose)

    default_params = get_default_parameters(data_batch_folder="temp",
                                            include_euclidean_time_obs=False)

    ############ COLD START #############
    cold_beta60_params = copy.deepcopy(default_params)
    cold_beta60_params["batch_folder"] = batch_folder
    cold_beta60_params["batch_name"] = "B60_THERM_COLD"
    cold_beta60_params["load_binary_file"] = False
    cold_beta60_params["beta"] = 6.0
    cold_beta60_params["topc_y_limits"] = [-2, 2]
    cold_beta60_params["num_bins_per_int"] = 32
    cold_beta60_params["bin_range"] = [-2.5, 2.5]
    cold_beta60_params["hist_flow_times"] = [0, 250, 600]
    cold_beta60_params["NCfgs"] = get_num_observables(
        cold_beta60_params["batch_folder"], cold_beta60_params["batch_name"])
    cold_beta60_params["obs_file"] = "8_6.00"
    cold_beta60_params["N"] = 8
    cold_beta60_params["NT"] = 16
    cold_beta60_params["color"] = "#377eb8"

    ########## HOT RND START ############
    hot_rnd_beta60_params = copy.deepcopy(default_params)
    hot_rnd_beta60_params["batch_folder"] = batch_folder
    hot_rnd_beta60_params["batch_name"] = "B60_THERM_HOT_RND"

    ########## HOT RST START ############
    hot_rst_beta60_params = copy.deepcopy(default_params)
    hot_rst_beta60_params["batch_folder"] = batch_folder
    hot_rst_beta60_params["batch_name"] = "B60_THERM_HOT_RST"

    if run_pre_analysis:
        # Submitting distribution analysis
        cold_data = load_observable(cold_beta60_params)
        hot_rnd_data = load_observable(hot_rnd_beta60_params)
        hot_rst_data = load_observable(hot_rst_beta60_params)

    # # Loads post analysis data
    # cold_data = post_analysis.PostAnalysisDataReader(
    #     [cold_beta60_params],
    #     observables_to_load=cold_beta60_params["observables"],
    #     verbose=verbose)

    # hot_rnd_data = post_analysis.PostAnalysisDataReader(
    #     [hot_rnd_beta60_params],
    #     observables_to_load=hot_rnd_beta60_params["observables"],
    #     verbose=verbose)

    # hot_rst_data = post_analysis.PostAnalysisDataReader(
    #     [hot_rst_beta60_params],
    #     observables_to_load=hot_rst_beta60_params["observables"],
    #     verbose=verbose)

    # TODO: plot termaliations for the 3 different observables

    plot_types = ["default", "loglog", "logx", "logy"]

    y_labels = [[r"$P$", r"$Q$", r"$E$"],
                [
                    r"$\frac{|P - \langle P \rangle|}{\langle P \rangle}$",
                    r"$\frac{|Q - \langle Q \rangle|}{\langle Q \rangle}$",
                    r"$\frac{|E - \langle E \rangle|}{\langle E \rangle}$"
                ],
                [
                    r"$|P - \langle P \rangle|$", r"$|Q - \langle Q \rangle|$",
                    r"$|E - \langle E \rangle|$"
                ]]
    # y_labels[i_dr] = [r"$\langle P \rangle$", r"$\langle P \rangle$",
    #             r"$\langle P \rangle$"]

    subplot_rows = [1, 3]

    # Limits to be put on plot
    x_limits = [[] for i in range(3)]
    y_limits = [[], [], []]

    data_representations = ["default", "relerr", "abserr"]

    obs_list = cold_data["obs"].keys()

    x_label = r"$t_\mathrm{MC}$"

    for i_dr, dr in enumerate(data_representations):
        for pt in plot_types:
            for i_obs, obs in enumerate(obs_list):
                for plot_rows in subplot_rows:

                    # Sets up figure folder for observable
                    figure_folder = os.path.join(base_figure_folder, obs)
                    check_folder(figure_folder, verbose=verbose)

                    # Sets up plot type folder
                    figure_folder = os.path.join(figure_folder, pt)
                    check_folder(figure_folder, verbose=verbose)

                    if obs == "energy":
                        correction_factor = -1.0 / 64
                        cold_data["obs"][obs] *= correction_factor
                        hot_rnd_data["obs"][obs] *= correction_factor
                        hot_rst_data["obs"][obs] *= correction_factor

                    # Retrieves data and makes modifications
                    _cold_data = modify_data(cold_data["obs"][obs][:mc_cutoff],
                                             dr)
                    _hot_rnd_data = modify_data(
                        hot_rnd_data["obs"][obs][:mc_cutoff], dr)
                    _hot_rst_data = modify_data(
                        hot_rst_data["obs"][obs][:mc_cutoff], dr)

                    # Creates figure name
                    figure_name = "{0:s}_{1:s}_{2:s}_{3:d}plotrows.pdf".format(
                        obs, pt, dr, plot_rows)

                    plot_data_array(
                        [np.arange(_cold_data.shape[0]) for i in range(3)],
                        [_cold_data, _hot_rnd_data, _hot_rst_data],
                        ["Cold start", "Hot start", r"Hot start, $RST$"],
                        x_label,
                        y_labels[i_dr][i_obs],
                        figure_name,
                        figure_folder,
                        plot_type=pt,
                        x_limits=x_limits[i_obs],
                        y_limits=y_limits[i_obs],
                        mark_every=mark_every,
                        subplot_rows=plot_rows)
Пример #23
0
    def _series_plot_core(self,
                          indexes,
                          x_limits=None,
                          y_limits=None,
                          plot_with_formula=False,
                          error_shape="band",
                          fname=None,
                          filename_addendum="",
                          legend_loc="best",
                          legend_size=6,
                          use_common_legend=False,
                          sub_adjust_bottom=0.18,
                          x_label_bottom_pos=(0.51, 0.115),
                          common_legend_anchor=(0.5, -0.01),
                          plot_overlay=None,
                          sub_titles=None):
        """
        Core structure of the series plot, allows to easily be expanded upon
        by the needs of the different observables.

        Args:
                indexes: list containing integers of which intervals to plot
                        together.
                x_limits: limits of the x-axis. Default is False.
                y_limits: limits of the y-axis. Default is False.
                plot_with_formula: bool, default is false, is True will look for
                        formula for the y-value to plot in title.
                error_shape: plot with error bands or with error bars.
                        Options: band, bars
                fname: str, figure name. Default is
                        post_analysis_{obs_name}_{analysis_type}.pdf
                filename_addendum: str, default is ''. Adds extra string at end
                        of filename.
                legend_loc: str, position of legend box. Default is 'best'.
                legend_size: int, fontsize. Default is 6.
                use_common_legend: bool, if True will use commond legend. 
                        Default is False.
                sub_adjust_bottom: float, scales the plot to make more room at
                        bottom. Default is 0.18.
                x_label_bottom_ypos: float, position of common x-label. Default
                        is 0.115.
                common_legend_anchor: tuple of floats, common legend offset 
                        at bottom. Default is (0.5,-0.01).
                plot_overlay: list of dicts, default is None.
                sub_title: list, defualt is None. Will make strings in list
                        as title for each plot.
        """

        old_rc_paramx = plt.rcParams['xtick.labelsize']
        old_rc_paramy = plt.rcParams['ytick.labelsize']
        plt.rcParams['xtick.labelsize'] = 6
        plt.rcParams['ytick.labelsize'] = 6

        # Starts plotting
        fig, axes = plt.subplots(2, 2, sharey=True, sharex=True)

        # Checks that we actually have enough different data points to plot
        def comparer(b, ind):
            return len(self.plot_values[b]) > max(ind)
        asrt_msg = "Need at least %d different values. Currently have %d: %s" \
            % (max(indexes), len(self.plot_values.values()[0]),
               ", ".join(self.plot_values.values()[0].keys()))
        if not np.all([comparer(b, indexes) for b in self.batch_names]):
            print "WARNING:", asrt_msg
            return

        for ax, i in zip(list(itertools.chain(*axes)), indexes):
            for bn in self.sorted_batch_names:
                # Retrieves the values deepending on the indexes provided and
                # beta values.
                value = self.plot_values[bn][sorted(
                    self.observable_intervals[bn])[i]]

                # Retrieves values to plot
                x = value["x"]
                y = value["y"]
                y_err = value["y_err"]

                if error_shape == "band":
                    ax.plot(x,
                            y,
                            "-",
                            label=value["label"],
                            color=self.colors[bn])
                    ax.fill_between(x,
                                    y - y_err,
                                    y + y_err,
                                    alpha=0.5,
                                    edgecolor='',
                                    facecolor=self.colors[bn])
                elif error_shape == "bars":
                    ax.errorbar(x,
                                y,
                                yerr=y_err,
                                capsize=5,
                                fmt="_",
                                ls=":",
                                label=value["label"],
                                color=self.colors[bn],
                                ecolor=self.colors[bn])
                else:
                    raise KeyError("%s is not a valid error bar shape." %
                                   error_shape)

            # Sets axes limits if provided
            if not isinstance(x_limits, type(None)):
                ax.set_xlim(x_limits)
            else:
                x_limits = [x[0], x[-1]]
            if not isinstance(y_limits, type(None)):
                ax.set_ylim(y_limits)

            # Plotting plot overlay
            if not isinstance(plot_overlay, type(None)):
                for _po in plot_overlay:
                    x = np.linspace(x_limits[0], x_limits[1],
                                    self.num_overlay_points)
                    y = np.ones(self.num_overlay_points) * _po["mass"]
                    y_err = np.ones(self.num_overlay_points)
                    y_err *= _po["mass_error"]
                    if ("label" in _po):
                        ax.plot(x,
                                y,
                                _po["ls"],
                                label=_po["label"],
                                color=_po["color"])
                    else:
                        ax.plot(x, y, _po["ls"], color=_po["color"])
                    ax.fill_between(x,
                                    y - y_err,
                                    y + y_err,
                                    alpha=0.5,
                                    edgecolor="",
                                    facecolor=_po["color"])

            # Basic plotting commands
            ax.grid(True)
            if not use_common_legend:
                ax.legend(loc=legend_loc, prop={"size": legend_size})

            if not isinstance(sub_titles, type(None)):
                ax.set_title(sub_titles[i], fontsize=8)

        # In case we use common legend box for all plots
        if use_common_legend:
            # https://stackoverflow.com/questions/4700614/how-to-put-the-legend-out-of-the-plot
            # Shrink current axis's height by 10% on the bottom
            handles, labels = axes[0, 0].get_legend_handles_labels()
            fig.legend(handles,
                       labels,
                       loc="lower center",
                       ncol=5,
                       bbox_to_anchor=common_legend_anchor)
            plt.subplots_adjust(bottom=sub_adjust_bottom)
        else:
            x_label_bottom_pos = (0.52, 0.035)

        # Set common labels
        # https://stackoverflow.com/questions/6963035/pyplot-axes-labels-for-subplots
        fig.text(x_label_bottom_pos[0],
                 x_label_bottom_pos[1],
                 self.x_label,
                 ha='center',
                 va='center',
                 fontsize=11)
        fig.text(0.03,
                 0.5,
                 self.y_label,
                 ha='center',
                 va='center',
                 rotation='vertical',
                 fontsize=11)

        # Sets the title string
        # title_string = r"%s" % self.observable_name
        # if plot_with_formula:
        #   title_string += r" %s" % self.formula
        # plt.suptitle(title_string)
        # plt.tight_layout(pad=1.7)

        # Saves and closes figure
        folder_name = "beta%s" % "-".join(
            [str(bn) for bn in self.beta_values.values()])
        folder_name += "_N%s" % "".join([str(i) for i in indexes])
        folder_path = os.path.join(self.output_folder_path, folder_name)
        check_folder(folder_path, False, True)

        if isinstance(fname, types.NoneType):
            fpath = os.path.join(
                folder_path, "post_analysis_%s_%s%s.pdf" %
                (self.observable_name_compact, self.analysis_data_type,
                 filename_addendum))
        else:
            fpath = os.path.join(folder_path, fname)

        plt.savefig(fpath, dpi=self.dpi)
        if self.verbose:
            print "Figure saved in %s" % fpath
        # plt.show()
        plt.close(fig)

        plt.rcParams['xtick.labelsize'] = old_rc_paramx
        plt.rcParams['ytick.labelsize'] = old_rc_paramy
Пример #24
0
def scaling_analysis():
    """
    Scaling analysis.
    """

    # TODO: complete load slurm output function
    # TODO: add line fitting procedure to plot_scaling function
    # TODO: double check what I am plotting makes sense(see pdf)

    # Basic parameters
    verbose = True
    run_pre_analysis = True
    json_file = "run_times_updated.json"
    datapath = os.path.join(("/Users/hansmathiasmamenvege/Programming/LQCD/"
                             "data/scaling_output/"), json_file)

    slurm_output_folder = check_relative_path("../data/scaling_output")

    # Extract times from slurm files and put into json file
    if not os.path.isfile(datapath):
        print "No {} found. Loading slurm data.".format(json_file)
        load_slurm_folder(slurm_output_folder)

    # Basic figure setup
    base_figure_folder = check_relative_path("figures")
    base_figure_folder = os.path.join(base_figure_folder, "scaling")
    check_folder(base_figure_folder, verbose=verbose)

    # Strong scaling folder setup
    strong_scaling_figure_folder = os.path.join(base_figure_folder, "strong")
    check_folder(strong_scaling_figure_folder, verbose=verbose)

    # Weak scaling folder setup
    weak_scaling_figure_folder = os.path.join(base_figure_folder, "weak")
    check_folder(weak_scaling_figure_folder, verbose=verbose)

    default_params = get_default_parameters(data_batch_folder="temp",
                                            include_euclidean_time_obs=False)

    # Loads scaling times and splits into weak and strong
    with open(datapath, "r") as f:
        scaling_times = json.load(f)["runs"]
    strong_scaling_times = filter_scalings(scaling_times, "strong_scaling_np")
    weak_scaling_times = filter_scalings(scaling_times, "weak_scaling_np")

    # Splits strong scaling into gen, io, flow
    gen_strong_scaling = filter_scalings(strong_scaling_times, "gen")
    io_strong_scaling = filter_scalings(strong_scaling_times, "io")
    flow_strong_scaling = filter_scalings(strong_scaling_times, "flow")

    # Splits weak scaling into gen, io, flow
    gen_weak_scaling = filter_scalings(weak_scaling_times, "gen")
    io_weak_scaling = filter_scalings(weak_scaling_times, "io")
    flow_weak_scaling = filter_scalings(weak_scaling_times, "flow")

    # Adds number of processors to strong scaling
    gen_strong_scaling = add_numprocs(gen_strong_scaling)
    io_strong_scaling = add_numprocs(io_strong_scaling)
    flow_strong_scaling = add_numprocs(flow_strong_scaling)

    # Adds number of processors to strong scaling
    gen_weak_scaling = add_numprocs(gen_weak_scaling)
    io_weak_scaling = add_numprocs(io_weak_scaling)
    flow_weak_scaling = add_numprocs(flow_weak_scaling)

    scalings = [
        gen_strong_scaling, io_strong_scaling, flow_strong_scaling,
        gen_weak_scaling, io_weak_scaling, flow_weak_scaling
    ]

    for time_type in ["update_time", "time"]:

        # Loops over scaling values in scalings
        for sv in scalings:
            x = [i["NP"] for i in sv]
            y = [i[time_type] for i in sv]

            # Sets up filename and folder name
            _scaling = list(set([i["runname"].split("_")[0] for i in sv]))
            _sc_part = list(set([i["runname"].split("_")[-1] for i in sv]))
            assert len(_scaling) == 1, \
                "incorrect sv type list: {}".format(_scaling)
            assert len(_sc_part) == 1, \
                "incorrect sv part list length: {}".format(_sc_part)
            _sc_part = _sc_part[0]
            _scaling = _scaling[0]
            figure_name = "{}_{}_{}.pdf".format(_scaling, _sc_part, time_type)

            if _sc_part != "gen" and time_type == "update_time":
                print "Skipping {}".format(figure_name)
                continue

            # Sets correct figure folder
            if _scaling == "strong":
                figure_folder = strong_scaling_figure_folder
            elif _scaling == "weak":
                figure_folder = weak_scaling_figure_folder
            else:
                raise ValueError("Scaling type not recognized for"
                                 " folder: {}".format(_scaling))

            plot_scaling(
                x, y, _sc_part.capitalize(), r"$N_p$", r"$t_\mathrm{%s}$" %
                time_type.replace("_", r"\ ").capitalize(), figure_folder,
                figure_name)
    def __init__(self,
                 data,
                 with_autocorr=True,
                 figures_folder="../figures",
                 verbose=False,
                 dryrun=False):
        """
        Initializes this specialized form of finding the ratio of different 
        topological charge definitions.
        """
        if with_autocorr:
            self.ac = "with_autocorr"
        else:
            self.ac = "without_autocorr"
        self.with_autocorr = with_autocorr
        observable = self.observable_name_compact

        self.verbose = verbose
        self.dryrun = dryrun

        self.beta_values = sorted(data.beta_values)
        self.colors = data.colors
        self.lattice_sizes = data.lattice_volumes
        self.size_labels = data.labels
        self.reference_values = data.reference_values
        self.print_latex = data.print_latex
        self._setup_analysis_types(data.analysis_types)

        self.observable_intervals = {b: {} for b in self.beta_values}

        # Q^2
        self.topc2 = {atype: {beta: {} for beta in self.beta_values} \
            for atype in self.analysis_types}

        # Q^4
        self.topc4 = {atype: {beta: {} for beta in self.beta_values} \
            for atype in self.analysis_types}

        # Q^4_C
        self.topc4C = {atype: {beta: {} for beta in self.beta_values} \
            for atype in self.analysis_types}

        # R = Q^4_C / Q^2
        self.topcR = {atype: {beta: {} for beta in self.beta_values} \
            for atype in self.analysis_types}

        # Data will be copied from R
        self.data = {atype: {beta: {} for beta in self.beta_values} \
            for atype in self.analysis_types}

        # Q^2 and Q^4 raw bs values
        self.topc2_raw = {}
        self.topc4_raw = {}
        self.topc4c_raw = {}
        self.topcR_raw = {}
        self.data_raw = {}

        for atype in data.raw_analysis:
            if atype == "autocorrelation":
                self.ac_raw = data.raw_analysis[atype]
            else:
                self.data_raw[atype] = data.raw_analysis[atype]

        # First, gets the topc2, then topc4
        for atype in self.analysis_types:
            for beta in self.beta_values:
                self.observable_intervals[beta] = \
                    data.data_observables["topc2MC"][beta].keys()

                for subobs in data.data_observables["topc2MC"][beta]:

                    # Q^2
                    self.topc2[atype][beta][subobs] = \
                        data.data_observables["topc2MC"] \
                        [beta][subobs][self.ac][atype]

                    # Q^4
                    self.topc4[atype][beta][subobs] = \
                        data.data_observables["topc4MC"] \
                        [beta][subobs][self.ac][atype]

                    if self.with_autocorr:
                        self.topc2[atype][beta][subobs]["ac"] = \
                            data.data_observables["topc2MC"][beta] \
                            [subobs]["with_autocorr"]["autocorr"]

                        self.topc4[atype][beta][subobs]["ac"] = \
                            data.data_observables["topc4MC"][beta] \
                            [subobs]["with_autocorr"]["autocorr"]

        # Creates base output folder for post analysis figures
        self.figures_folder = figures_folder
        check_folder(self.figures_folder,
                     dryrun=self.dryrun,
                     verbose=self.verbose)
        check_folder(os.path.join(self.figures_folder, data.batch_name),
                     dryrun=self.dryrun,
                     verbose=self.verbose)

        # Creates output folder
        self.post_anlaysis_folder = os.path.join(self.figures_folder,
                                                 data.batch_name,
                                                 "post_analysis")
        check_folder(self.post_anlaysis_folder,
                     dryrun=self.dryrun,
                     verbose=self.verbose)

        # Creates observable output folder
        self.output_folder_path = os.path.join(self.post_anlaysis_folder,
                                               self.observable_name_compact)
        check_folder(self.output_folder_path,
                     dryrun=self.dryrun,
                     verbose=self.verbose)

        self._setup_article_values()
        self._normalize_article_values()

        self._setup_volumes()
        self._normalize_Q()
        self._calculate_Q4C()
        self._calculate_R()
def distribution_plotter(data,
                         observable,
                         xlabel,
                         ylabel,
                         mark_interval=10,
                         flow_time=400,
                         verbose=False):
    """
    Plots distributions to analyse how we deepend on the epsilon in data 
    generation.

    Plots autocorr together all in one figure, eps vs final autocorr, 
    eps vs obs at specific flow time
    """

    folder_path = "../figures"
    if not os.path.isdir(folder_path):
        folder_path = "../../figures"

    # Adds distribution runs folder
    folder_path = os.path.join(folder_path, "distribution_runs")
    check_folder(folder_path, verbose=verbose)

    # Adds post analysis folder
    folder_path = os.path.join(folder_path, "post_analysis")
    check_folder(folder_path, verbose=verbose)

    # Adds observable folder
    folder_path = os.path.join(folder_path, observable)
    check_folder(folder_path, verbose=verbose)

    # Retrieves relevant values
    eps_values = sorted(data.keys())
    autocorr = []
    obs_data = []
    data_types = ["unanalyzed", "bootstrap", "jackknife"]

    for eps in eps_values:
        # print data[eps][observable][6.0]["with_autocorr"].keys(), \
        #     data[eps][observable][6.0]["with_autocorr"]["autocorr"].keys(), \
        #     data[eps][observable][6.0]["with_autocorr"]["jackknife"].keys()

        autocorr.append(
            data[eps][observable][6.0]["with_autocorr"]["autocorr"])
        obs_data.append({
            t: data[eps][observable][6.0]["with_autocorr"][t]
            for t in data_types
        })

    # Plots the different observables
    for t in data_types:
        fig = plt.figure()
        ax = fig.add_subplot(111)

        for i, eps in enumerate(eps_values):
            ax.errorbar(obs_data[i][t]["x"],
                        obs_data[i][t]["y"],
                        yerr=obs_data[i][t]["y_error"],
                        label=r"$\epsilon_\mathrm{rnd}=%.2f$" % eps,
                        alpha=0.5,
                        capsize=5,
                        fmt="_",
                        markevery=mark_interval,
                        errorevery=mark_interval)

        ax.legend()
        ax.set_xlabel(xlabel)
        ax.set_ylabel(ylabel)
        ax.grid(True)

        # Checks and creates relevant folder
        figname = os.path.join(folder_path,
                               "{0:s}_{1:s}.pdf".format(t, observable))
        fig.savefig(figname)
        print "Created figure {}".format(figname)

        plt.close(fig)

    # Plots eps vs obs at final flow time
    for t in data_types:
        fig = plt.figure()
        ax = fig.add_subplot(111)

        x = [obs_data[i][t]["x"][flow_time] for i in range(len(eps_values))]
        y = [obs_data[i][t]["y"][flow_time] for i in range(len(eps_values))]
        yerr = [
            obs_data[i][t]["y_error"][flow_time]
            for i in range(len(eps_values))
        ]

        ax.errorbar(eps_values,
                    y,
                    yerr=yerr,
                    label=r"$t_f={0:.2f}$".format(x[0]),
                    alpha=0.5,
                    capsize=5,
                    fmt="_")

        ax.legend()
        ax.set_xlabel(r"$\epsilon_\mathrm{rnd}$")
        ax.set_ylabel(ylabel)
        ax.grid(True)

        # Checks and creates relevant folder
        figname = os.path.join(folder_path,
                               "eps_vs_{0:s}_{1:s}.pdf".format(observable, t))
        fig.savefig(figname)
        print "Created figure {}".format(figname)

        plt.close(fig)

    # Plots eps vs autocorr at final flow time
    fig = plt.figure()
    ax = fig.add_subplot(111)

    y = [autocorr[i]["tau_int"][flow_time] for i in range(len(eps_values))]
    yerr = [
        autocorr[i]["tau_int_err"][flow_time] for i in range(len(eps_values))
    ]

    ax.errorbar(eps_values,
                y,
                yerr=yerr,
                label=r"$t_f={0:.2f}$".format(x[0]),
                alpha=1.0,
                capsize=5,
                fmt="_")

    ax.legend()
    ax.set_xlabel(r"$\epsilon_\mathrm{rnd}$")
    ax.set_ylabel(r"$\tau_\mathrm{int}$")
    ax.grid(True)

    # Checks and creates relevant folder
    figname = os.path.join(folder_path,
                           "eps_vs_tau_int_{0:s}.pdf".format(observable))
    fig.savefig(figname)
    print "Created figure {}".format(figname)

    plt.close(fig)
def topc_modes_analysis():
    """Analysis for different lattice sizes and their topological charges."""
    default_params = get_default_parameters(data_batch_folder="temp")

    run_pre_analysis = False
    verbose = True

    data_path = "../data/"
    if not os.path.isdir(data_path):
        data_path = "../" + data_path

    ########## Smaug data 8x16 analysis ##########
    smaug8x16_data_beta60_analysis = copy.deepcopy(default_params)
    smaug8x16_data_beta60_analysis["batch_folder"] = data_path
    smaug8x16_data_beta60_analysis["batch_name"] = "beta60_8x16_run"
    smaug8x16_data_beta60_analysis["beta"] = 6.0
    smaug8x16_data_beta60_analysis["topc_y_limits"] = [-2, 2]
    smaug8x16_data_beta60_analysis["num_bins_per_int"] = 32
    smaug8x16_data_beta60_analysis["bin_range"] = [-2.5, 2.5]
    smaug8x16_data_beta60_analysis["hist_flow_times"] = [0, 250, 600]
    smaug8x16_data_beta60_analysis["NCfgs"] = get_num_observables(
        smaug8x16_data_beta60_analysis["batch_folder"],
        smaug8x16_data_beta60_analysis["batch_name"])
    smaug8x16_data_beta60_analysis["obs_file"] = "8_6.00"
    smaug8x16_data_beta60_analysis["N"] = 8
    smaug8x16_data_beta60_analysis["NT"] = 16
    smaug8x16_data_beta60_analysis["color"] = "#377eb8"

    ########## Smaug data 12x24 analysis ##########
    smaug12x24_data_beta60_analysis = copy.deepcopy(default_params)
    smaug12x24_data_beta60_analysis["batch_folder"] = data_path
    smaug12x24_data_beta60_analysis["batch_name"] = "beta60_12x24_run"
    smaug12x24_data_beta60_analysis["beta"] = 6.0
    smaug12x24_data_beta60_analysis["topc_y_limits"] = [-4, 4]
    smaug12x24_data_beta60_analysis["num_bins_per_int"] = 16
    smaug12x24_data_beta60_analysis["bin_range"] = [-4.5, 4.5]
    smaug12x24_data_beta60_analysis["hist_flow_times"] = [0, 250, 600]
    smaug12x24_data_beta60_analysis["NCfgs"] = get_num_observables(
        smaug12x24_data_beta60_analysis["batch_folder"],
        smaug12x24_data_beta60_analysis["batch_name"])
    smaug12x24_data_beta60_analysis["obs_file"] = "12_6.00"
    smaug12x24_data_beta60_analysis["N"] = 12
    smaug12x24_data_beta60_analysis["NT"] = 24
    smaug12x24_data_beta60_analysis["color"] = "#377eb8"

    ########## Smaug data 16x32 analysis ##########
    smaug16x32_data_beta61_analysis = copy.deepcopy(default_params)
    smaug16x32_data_beta61_analysis["batch_folder"] = data_path
    smaug16x32_data_beta61_analysis["batch_name"] = "beta61_16x32_run"
    smaug16x32_data_beta61_analysis["beta"] = 6.1
    smaug16x32_data_beta61_analysis["topc_y_limits"] = [-8, 8]
    smaug16x32_data_beta61_analysis["num_bins_per_int"] = 16
    smaug16x32_data_beta61_analysis["bin_range"] = [-7.5, 7.5]
    smaug16x32_data_beta61_analysis["hist_flow_times"] = [0, 250, 600]
    smaug16x32_data_beta61_analysis["NCfgs"] = get_num_observables(
        smaug16x32_data_beta61_analysis["batch_folder"],
        smaug16x32_data_beta61_analysis["batch_name"])
    smaug16x32_data_beta61_analysis["obs_file"] = "16_6.10"
    smaug16x32_data_beta61_analysis["N"] = 16
    smaug16x32_data_beta61_analysis["NT"] = 32
    smaug16x32_data_beta61_analysis["color"] = "#377eb8"

    param_list = [
        smaug8x16_data_beta60_analysis, smaug12x24_data_beta60_analysis,
        smaug16x32_data_beta61_analysis
    ]

    if run_pre_analysis:
        # Submitting analysis
        for analysis_parameters in param_list:
            pre_analysis(analysis_parameters)

    # Loads topc data
    data = []
    # N_val = [24, 24, 28]
    for i, param in enumerate(param_list):
        print "Loading data for: {}".format(param["batch_name"])
        # fpath = os.path.join(param["batch_folder"], param["batch_name"],
        #                      "{0:d}_{1:.2f}.npy".format(N_val[i],
        #                                                 param["beta"]))
        data_, p = get_data_parameters(param)
        data.append({
            "data": data_("topc")["obs"].T,
            "beta": param["beta"],
            "N": param["N"]
        })

        # print data_("topc")["obs"].shape

    # Flow time to plots
    flow_times = [0, 250, 600]

    # Histogram plotting
    xlim = 7.5
    NBins = np.arange(-xlim, xlim, 0.05)
    for t_f in flow_times:
        # Adds unanalyzed data
        fig, axes = plt.subplots(3, 1, sharey=False, sharex=True)
        for i, ax in enumerate(axes):
            lab = r"${0:d}^3\times{1:d}$, $\beta={2:.2f}$".format(
                data[i]["N"], data[i]["N"] * 2, data[i]["beta"])

            weights = np.ones_like(data[i]["data"][t_f])
            weights /= len(data[i]["data"][t_f])
            ax.hist(data[i]["data"][t_f],
                    bins=NBins,
                    label=lab,
                    weights=weights)
            ax.legend(loc="upper right")
            ax.grid(True)
            ax.set_xlim(-xlim, xlim)

            if i == 1:
                ax.set_ylabel(r"$Hits$")
            elif i == 2:
                ax.set_xlabel(r"$Q$")

        # Sets up figure
        figpath = "figures/topc_modes_analysis"
        if not os.path.isdir(figpath):
            figpath = "../" + figpath
        check_folder(figpath, verbose=verbose)
        figpath = os.path.join(figpath, "topc_modes_tf{}.pdf".format(t_f))
        fig.savefig(figpath)
        print "Figure saved at {0:s}".format(figpath)
        plt.close(fig)