def plot_decomp(row, Mean, EigVec, fig=None, ax=None, Title=None, interactive=False): """Plot a single reconstruction with an informative title :param row: SparkSQL Row that contains the measurements for a particular station, year and measurement. :param Mean: The mean vector of all measurements of a given type :param v: eigen-vectors for the distribution of measurements. :param fig: a matplotlib figure in which to place the plot :param ax: a matplotlib axis in which to place the plot :param Title: A plot title over-ride. :param interactive: A flag that indicates whether or not this is an interactive plot (widget-driven) :returns: a plotter returned by recon_plot initialization :rtype: recon_plot """ target = np.array(unpackArray(row.Values, np.float16), dtype=np.float64) if Title is None: Title = '%s / %d %s' % (row['station'], row['year'], row['measurement']) eigen_decomp = Eigen_decomp(range(1, 366), target, Mean, EigVec) plotter = recon_plot(eigen_decomp, year_axis=True, fig=fig, ax=ax, interactive=interactive, Title=Title) return plotter
def decompose(row): """compute residual and coefficients for a single row :param row: SparkSQL Row that contains the measurements for a particular station, year and measurement. :returns: the input row with additional information from the eigen-decomposition. :rtype: SparkSQL Row Note that Decompose is designed to run inside a spark "map()" command inside decompose_dataframe. Mean and EigVec are sent to the workers as global variables of "decompose" """ Series=np.array(unpackArray(row.Values,np.float16),dtype=np.float64) recon=Eigen_decomp(None,Series,Mean,EigVec); total_var,residuals,coeff=recon.compute_var_explained() D=row.asDict() D['total_var']=float(total_var[1]) D['res_mean']=float(residuals[1][0]) for i in range(1,residuals[1].shape[0]): D['res_'+str(i)]=float(residuals[1][i]) D['coeff_'+str(i)]=float(coeff[1]['c'+str(i-1)]) return Row(**D)