示例#1
0
def mdl_1d_cat(x, y):
    """builds univariate model to calculate AUC"""
    if x.nunique() > 10 and com.is_numeric_dtype(x):
        x = sb_cutz(x)

    series = pd.get_dummies(x, dummy_na=True)
    lr = LogisticRegressionCV(scoring='roc_auc')

    lr.fit(series, y)

    try:
        preds = (lr.predict_proba(series)[:, -1])
        #preds = (preds > preds.mean()).astype(int)
    except ValueError:
        Tracer()()

    plot = plot_cat(x, y)

    imgdata = BytesIO()
    plot.savefig(imgdata)
    imgdata.seek(0)

    aucz = roc_auc_score(y, preds)
    cmatrix = 'data:image/png;base64,' + \
        quote(base64.b64encode(imgdata.getvalue()))
    plt.close()
    return aucz, cmatrix
示例#2
0
def to_greyscale(profile_picture):
  response = requests.get(profile_picture['source'])
  gs_image = Image.open(BytesIO(response.content)).convert('L')
  buffer_image = BytesIO()
  gs_image.save(buffer_image, 'JPEG', quality=90)
  buffer_image.seek(0)
  return buffer_image
示例#3
0
def image(filename):
  gs_file_string = redis.get(filename)
  buffer_image = BytesIO()
  gs_image = Image.open(BytesIO(gs_file_string))
  gs_image.save(buffer_image, 'JPEG', quality=90)
  buffer_image.seek(0)
  return Response(buffer_image.getvalue(), mimetype='image/jpeg')
示例#4
0
    def describe_numeric_1d(series, base_stats):
        stats = {'mean': series.mean(), 'std': series.std(), 'variance': series.var(), 'min': series.min(),
                'max': series.max()}
        stats['range'] = stats['max'] - stats['min']

        for x in np.array([0.05, 0.25, 0.5, 0.75, 0.95]):
            stats[pretty_name(x)] = series.quantile(x)
        stats['iqr'] = stats['75%'] - stats['25%']
        stats['kurtosis'] = series.kurt()
        stats['skewness'] = series.skew()
        stats['sum'] = series.sum()
        stats['mad'] = series.mad()
        stats['cv'] = stats['std'] / stats['mean'] if stats['mean'] else np.NaN
        stats['type'] = "NUM"
        stats['n_zeros'] = (len(series) - np.count_nonzero(series))
        stats['p_zeros'] = stats['n_zeros'] / len(series)

        # Large histogram
        imgdata = BytesIO()
        plot = series.plot(kind='hist', figsize=(6, 4),
                           facecolor='#337ab7', bins=bins)  # TODO when running on server, send this off to a different thread
        plot.figure.subplots_adjust(left=0.15, right=0.95, top=0.9, bottom=0.1, wspace=0, hspace=0)
        plot.figure.savefig(imgdata)
        imgdata.seek(0)
        stats['histogram'] = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue()))
        #TODO Think about writing this to disk instead of caching them in strings
        plt.close(plot.figure)

        stats['mini_histogram'] = mini_histogram(series)

        return pd.Series(stats, name=series.name)
示例#5
0
def mini_histogram(series, **kwargs):
    """Plot a small (mini) histogram of the data.

    Parameters
    ----------
    series: Series
        The data to plot.

    Returns
    -------
    str
        The resulting image encoded as a string.
    """
    imgdata = BytesIO()
    plot = _plot_histogram(series, figsize=(2, 0.75), **kwargs)
    plot.axes.get_yaxis().set_visible(False)

    if LooseVersion(matplotlib.__version__) <= '1.5.9':
        plot.set_axis_bgcolor("w")
    else:
        plot.set_facecolor("w")

    xticks = plot.xaxis.get_major_ticks()
    for tick in xticks[1:-1]:
        tick.set_visible(False)
        tick.label.set_visible(False)
    for tick in (xticks[0], xticks[-1]):
        tick.label.set_fontsize(8)
    plot.figure.subplots_adjust(left=0.15, right=0.85, top=1, bottom=0.35, wspace=0, hspace=0)
    plot.figure.savefig(imgdata)
    imgdata.seek(0)
    result_string = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue()))
    plt.close(plot.figure)
    return result_string
示例#6
0
def render_poster(name_image):
  # TODO: Parse for name and base64 Image
  name = name_image.split('-')[0]
  poster_string = redis.get(name)
  buffer_image = BytesIO()
  poster_image = Image.open(BytesIO(poster_string))
  poster_image.save(buffer_image, 'JPEG', quality=90)
  buffer_image.seek(0)
  return Response(buffer_image.getvalue(), mimetype='image/jpeg')
示例#7
0
def save_poster():
  buffer_image = BytesIO()
  buffer_image.seek(0)
  base64image = request.form['image']
  name = request.form['name']
  base64image = re.sub('data:image/png;base64,','',str(base64image))
  base64image = re.sub('\n','',base64image)
  poster_image = Image.open(BytesIO(base64.b64decode(base64image)))
  poster_image.save(buffer_image, 'JPEG', quality=90)
  buffer_image.seek(0)
  redis.set(name, buffer_image.getvalue())
  return json.dumps({'success':True}), 200, {'ContentType':'application/json'}
示例#8
0
def mdl_1d(x, y):
    """builds univariate model to calculate AUC"""
    lr = LogisticRegressionCV(scoring='roc_auc')
    lars = LassoLarsIC(criterion='aic')

    if x.nunique() > 10 and com.is_numeric_dtype(x):
        x2 = sb_cutz(x)
        series = pd.get_dummies(x2, dummy_na=True)
    else:
        series = pd.get_dummies(x, dummy_na=True)

    lr.fit(series, y)
    lars.fit(series, y)

    try:
        preds = (lr.predict_proba(series)[:, -1])
        #preds = (preds > preds.mean()).astype(int)
    except ValueError:
        Tracer()()

    # try:
    #    cm = confusion_matrix(y, (preds > y.mean()).astype(int))
    # except ValueError:
    #    Tracer()()

    aucz = roc_auc_score(y, preds)

    ns = num_bin_stats(x, y)

    nplot = plot_num(ns)
    #plot = plot_confusion_matrix(cm, y)

    imgdata = BytesIO()
    nplot.savefig(imgdata)
    imgdata.seek(0)
    nplot = 'data:image/png;base64,' + \
        quote(base64.b64encode(imgdata.getvalue()))
    plt.close()

    bplot = plot_bubble(ns)
    imgdatab = BytesIO()
    bplot.savefig(imgdatab)
    imgdatab.seek(0)
    bplot = 'data:image/png;base64,' + \
        quote(base64.b64encode(imgdatab.getvalue()))
    plt.close()

    return aucz, nplot, bplot
示例#9
0
 def mini_histogram(series):
     # Small histogram
     imgdata = BytesIO()
     plot = series.plot(kind='hist', figsize=(2, 0.75), facecolor='#337ab7', bins=bins)
     plot.axes.get_yaxis().set_visible(False)
     plot.set_axis_bgcolor("w")
     xticks = plot.xaxis.get_major_ticks()
     for tick in xticks[1:-1]:
         tick.set_visible(False)
         tick.label.set_visible(False)
     for tick in (xticks[0], xticks[-1]):
         tick.label.set_fontsize(8)
     plot.figure.subplots_adjust(left=0.15, right=0.85, top=1, bottom=0.35, wspace=0, hspace=0)
     plot.figure.savefig(imgdata)
     imgdata.seek(0)
     result_string = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue()))
     plt.close(plot.figure)
     return result_string
示例#10
0
def histogram(series, **kwargs):
    """Plot an histogram of the data.

    Parameters
    ----------
    series: Series, default None
        The data to plot.

    Returns
    -------
    str, The resulting image encoded as a string.
    """
    imgdata = BytesIO()
    plot = _plot_histogram(series, **kwargs)
    plot.figure.subplots_adjust(left=0.15, right=0.95, top=0.9, bottom=0.1, wspace=0, hspace=0)
    plot.figure.savefig(imgdata)
    imgdata.seek(0)
    result_string = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue()))
    # TODO Think about writing this to disk instead of caching them in strings
    plt.close(plot.figure)
    return result_string
示例#11
0
def send_fcgi_response(request, data, response):
    io = BytesIO(data)
    rec = Record()
    rec.read(io)
    resp = rec

    resp.type = FCGI_STDOUT
    resp.contentData = response
    resp.contentLength = len(response)
    out = BytesIO()
    resp.write(out)
    out.seek(0)
    request.sendall(out.read())

    resp.type = FCGI_END_REQUEST
    resp.contentData = ""
    resp.contentLength = 0
    out = BytesIO()
    resp.write(out)
    out.seek(0)
    request.sendall(out.read())
示例#12
0
def mini_histogram(series, **kwargs):
    """Plot a small (mini) histogram of the data.

    Parameters
    ----------
    series: Series
        The data to plot.

    Returns
    -------
    str
        The resulting image encoded as a string.
    """
    imgdata = BytesIO()
    #plot = _plot_histogram(series, figsize=(2, 0.75), **kwargs)
    plot = _plot_histogram(series, figsize=(4, 2), **kwargs)
    #plot.axes.get_yaxis().set_visible(False)

    if LooseVersion(matplotlib.__version__) <= '1.5.9':
        plot.set_axis_bgcolor("w")
    else:
        plot.set_facecolor("w")

    xticks = plot.xaxis.get_major_ticks()
    #for tick in xticks[1:-1]:
    #    tick.set_visible(False)
    #    tick.label.set_visible(False)
    for tick in (xticks[0], xticks[-1]):
        tick.label.set_fontsize(8)
    every_nth = 2
    for n, label in enumerate(plot.xaxis.get_ticklabels()):
        if n % every_nth == 0:
            label.set_visible(False)
    #plot.figure.subplots_adjust(left=0.15, right=0.85, top=1, bottom=0.35, wspace=0, hspace=0)
    plot.figure.subplots_adjust(left=0.2, right=0.95, top=0.95 , wspace=0, hspace=0)
    plot.figure.savefig(imgdata)
    imgdata.seek(0)
    result_string = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue()))
    plt.close(plot.figure)
    return result_string
示例#13
0
def correlation_matrix(corrdf, title, **kwargs):
    """Plot image of a matrix correlation.
    Parameters
    ----------
    corrdf: DataFrame
        The matrix correlation to plot.
    title: str
        The matrix title
    Returns
    -------
    str, The resulting image encoded as a string.
    """
    imgdata = BytesIO()
    fig_cor, axes_cor = plt.subplots(1, 1)
    labels = corrdf.columns
    matrix_image = axes_cor.imshow(corrdf,
                                   vmin=-1,
                                   vmax=1,
                                   interpolation="nearest",
                                   cmap='bwr')
    plt.title(title, size=18)
    plt.colorbar(matrix_image)

    num_labels = len(labels)
    if num_labels < 1:
        return ''

    axes_cor.set_xticks(
        np.arange(0, corrdf.shape[0], corrdf.shape[0] * 1.0 / num_labels))
    axes_cor.set_yticks(
        np.arange(0, corrdf.shape[1], corrdf.shape[1] * 1.0 / num_labels))
    axes_cor.set_xticklabels(labels, rotation=90)
    axes_cor.set_yticklabels(labels)

    matrix_image.figure.savefig(imgdata, bbox_inches='tight')
    imgdata.seek(0)
    result_string = 'data:image/png;base64,' + quote(
        base64.b64encode(imgdata.getvalue()))
    plt.close(matrix_image.figure)
    return result_string
示例#14
0
def histogram(series, **kwargs):
    """Plot an histogram of the data.

    Parameters
    ----------
    series: Series
        The data to plot.

    Returns
    -------
    str
        The resulting image encoded as a string.
    """
    imgdata = BytesIO()
    plot = _plot_histogram(series, **kwargs)
    plot.figure.subplots_adjust(left=0.15, right=0.95, top=0.9, bottom=0.1, wspace=0, hspace=0)
    plot.figure.savefig(imgdata)
    imgdata.seek(0)
    result_string = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue()))
    # TODO Think about writing this to disk instead of caching them in strings
    plt.close(plot.figure)
    return result_string
示例#15
0
def complete_histogram(hist_data):
    """Large histogram"""
    img_data = BytesIO()
    plt.figure(figsize=(6, 4))
    plot = plt.subplot()
    plt.bar(hist_data['left_edge'],
            hist_data['count'],
            width=hist_data['width'],
            facecolor='#337ab7')
    plot.set_ylabel('Frequency')
    plot.figure.subplots_adjust(left=0.15,
                                right=0.95,
                                top=0.9,
                                bottom=0.1,
                                wspace=0,
                                hspace=0)
    plot.figure.savefig(img_data)
    img_data.seek(0)
    result_string = BASE + quote(base64.b64encode(img_data.getvalue()))
    # TODO Think about writing this to disk instead of caching them in strings
    plt.close(plot.figure)
    return result_string
示例#16
0
def correlation_matrix(corrdf, title, **kwargs):
    """Plot image of a matrix correlation.
    Parameters
    ----------
    corrdf: DataFrame
        The matrix correlation to plot.
    title: str
        The matrix title
    Returns
    -------
    str, The resulting image encoded as a string.
    """
    imgdata = BytesIO()
    fig_cor, axes_cor = plt.subplots(1, 1)
    labels = corrdf.columns
    N = 256
    blues = np.ones((N, 4))
    blues[:, 0] = np.linspace(1, 66/256, N)
    blues[:, 1] = np.linspace(1, 136/256, N)
    blues[:, 2] = np.linspace(1, 181/256, N)
    reds = np.ones((N, 4))
    reds[:, 0] = np.linspace(209/256, 1, N)
    reds[:, 1] = np.linspace(60/256, 1, N)
    reds[:, 2] = np.linspace(75/256, 1, N)
    newcmp = ListedColormap(np.concatenate((reds, blues)))
    matrix_image = axes_cor.imshow(corrdf, vmin=-1, vmax=1, interpolation="nearest", cmap=newcmp)
    plt.title(title, size=18)
    plt.colorbar(matrix_image)
    axes_cor.set_xticks(np.arange(0, corrdf.shape[0], corrdf.shape[0] * 1.0 / len(labels)))
    axes_cor.set_yticks(np.arange(0, corrdf.shape[1], corrdf.shape[1] * 1.0 / len(labels)))
    axes_cor.set_xticklabels(labels, rotation=90)
    axes_cor.set_yticklabels(labels)

    matrix_image.figure.savefig(imgdata, bbox_inches='tight')
    imgdata.seek(0)
    result_string = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue()))
    plt.close(matrix_image.figure)
    return result_string
示例#17
0
 def mini_histogram(histogram_data):
     # Small histogram
     imgdata = BytesIO()
     hist_data = histogram_data
     figure = plt.figure(figsize=(2, 0.75))
     plot = plt.subplot()
     plt.bar(hist_data["left_edge"],
             hist_data["count"],
             width=hist_data["width"],
             facecolor='#337ab7')
     plot.axes.get_yaxis().set_visible(False)
     plot.set_facecolor("w")
     xticks = plot.xaxis.get_major_ticks()
     for tick in xticks[1:-1]:
         tick.set_visible(False)
         tick.label.set_visible(False)
     for tick in (xticks[0], xticks[-1]):
         tick.label.set_fontsize(8)
     plot.figure.subplots_adjust(left=0.15, right=0.85, top=1, bottom=0.35, wspace=0, hspace=0)
     plot.figure.savefig(imgdata)
     imgdata.seek(0)
     result_string = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue()))
     plt.close(plot.figure)
     return result_string
示例#18
0
 def mini_histogram(series):
     # Small histogram
     imgdata = BytesIO()
     plot = series.plot(kind='hist', figsize=(2, 0.75), facecolor='#337ab7')
     plot.axes.get_yaxis().set_visible(False)
     plot.set_axis_bgcolor("w")
     xticks = plot.xaxis.get_major_ticks()
     for tick in xticks[1:-1]:
         tick.set_visible(False)
         tick.label.set_visible(False)
     for tick in (xticks[0], xticks[-1]):
         tick.label.set_fontsize(8)
     plot.figure.subplots_adjust(left=0.15,
                                 right=0.85,
                                 top=1,
                                 bottom=0.35,
                                 wspace=0,
                                 hspace=0)
     plot.figure.savefig(imgdata)
     imgdata.seek(0)
     result_string = 'data:image/png;base64,' + quote(
         base64.b64encode(imgdata.getvalue()))
     plt.close(plot.figure)
     return result_string
示例#19
0
 def mini_histogram(histogram_data):
     # Small histogram
     imgdata = BytesIO()
     hist_data = histogram_data
     figure = plt.figure(figsize=(2, 0.75))
     plot = plt.subplot()
     plt.bar(hist_data["left_edge"],
             hist_data["count"],
             width=hist_data["width"],
             facecolor='#337ab7')
     plot.axes.get_yaxis().set_visible(False)
     plot.set_facecolor("w")
     xticks = plot.xaxis.get_major_ticks()
     for tick in xticks[1:-1]:
         tick.set_visible(False)
         tick.label.set_visible(False)
     for tick in (xticks[0], xticks[-1]):
         tick.label.set_fontsize(8)
     plot.figure.subplots_adjust(left=0.15, right=0.85, top=1, bottom=0.35, wspace=0, hspace=0)
     plot.figure.savefig(imgdata)
     imgdata.seek(0)
     result_string = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue()))
     plt.close(plot.figure)
     return result_string
示例#20
0
def create_front_page(council, postcode, multi_council=1):
    """
	create information page - about service, where to send form.
	"""
    body = []
    line = body.append

    line("Postal Vote Application")
    line("")

    if council:

        address = council.address.split("\n")
        naddress = ["Electoral Registration Officer"]
        for a in address:  # removes duplicate lines from formatting
            a = a.strip()
            if a not in naddress:
                naddress.append(a)

        naddress.append(council.postcode)

        line("From your postcode ({0}) we think you live in:".format(postcode))
        line("")
        line(council.name)
        line("")
        """
		Adds Warning for postcodes that cross multiple areas
		https://democracyclub.org.uk/blog/2017/03/20/4314-times-when-postcodes-arent-good-enough/
		"""
        if multi_council > 1:
            line(
                "BE CAREFUL: Your postcode covers multiple councils. Please check this is correct."
            )
            line("")

        line(
            "If this is right - you need to sign the form on the next page and send it to:"
        )

        line("")
        for a in naddress:
            line(a)
        line("")
        if council.phone:
            line("Phone: {0}".format(council.phone))

    else:
        line(
            "We can't find the council for your postcode - is this postcode correct? {0}"
            .format(postcode))

        line(
            "Visit aboutmyvote.co.uk to find the address for your local Electoral Registration Officer"
        )

    line("")
    line(
        "For the Electoral Commission page on postal voting visit www.aboutmyvote.co.uk"
    )
    line("")
    line("This form was pre-populated at postalvote.inkleby.com")

    packet = BytesIO()
    # create a new PDF with Reportlab

    can = canvas.Canvas(packet)

    x = 700
    for r in body:
        can.drawString(40, x, r)
        x -= 15

    can.save()

    packet.seek(0)
    new_pdf = PdfFileReader(packet)
    return new_pdf.getPage(0)  #return a pdf page
示例#21
0
    def describe_float_1d(df, column, current_result, nrows):
        if spark_version == "1.6+":
            stats_df = df.select(column).na.drop().agg(mean(col(column)).alias("mean"),
                                                       df_min(col(column)).alias("min"),
                                                       df_max(col(column)).alias("max"),
                                                       variance(col(column)).alias("variance"),
                                                       kurtosis(col(column)).alias("kurtosis"),
                                                       stddev(col(column)).alias("std"),
                                                       skewness(col(column)).alias("skewness"),
                                                       df_sum(col(column)).alias("sum"),
                                                       count(col(column) == 0.0).alias('n_zeros')
                                                       ).toPandas()
        else:
            stats_df = df.select(column).na.drop().agg(mean(col(column)).alias("mean"),
                                                       df_min(col(column)).alias("min"),
                                                       df_max(col(column)).alias("max"),
                                                       df_sum(col(column)).alias("sum"),
                                                       count(col(column) == 0.0).alias('n_zeros')
                                                       ).toPandas()
            stats_df["variance"] = df.select(column).na.drop().agg(variance_custom(col(column),
                                                                                   stats_df["mean"].iloc[0],
                                                                                   current_result["count"])).toPandas().iloc[0][0]
            stats_df["std"] = np.sqrt(stats_df["variance"])
            stats_df["skewness"] = df.select(column).na.drop().agg(skewness_custom(col(column),
                                                                                   stats_df["mean"].iloc[0],
                                                                                   current_result["count"])).toPandas().iloc[0][0]
            stats_df["kurtosis"] = df.select(column).na.drop().agg(kurtosis_custom(col(column),
                                                                                   stats_df["mean"].iloc[0],
                                                                                   current_result["count"])).toPandas().iloc[0][0]

        for x in [0.05, 0.25, 0.5, 0.75, 0.95]:
            stats_df[pretty_name(x)] = (df.select(column)
                                        .na.drop()
                                        .selectExpr("percentile_approx(`{col}`,CAST({n} AS DOUBLE))"
                                                    .format(col=column, n=x)).toPandas().iloc[:,0]
                                        )
        stats = stats_df.iloc[0].copy()
        stats.name = column
        stats["range"] = stats["max"] - stats["min"]
        stats["iqr"] = stats[pretty_name(0.75)] - stats[pretty_name(0.25)]
        stats["cv"] = stats["std"] / float(stats["mean"])
        stats["mad"] = (df.select(column)
                        .na.drop()
                        .select(df_abs(col(column)-stats["mean"]).alias("delta"))
                        .agg(df_sum(col("delta"))).toPandas().iloc[0,0] / float(current_result["count"]))
        stats["type"] = "NUM"
        stats['p_zeros'] = stats['n_zeros'] / float(nrows)

        # Large histogram
        imgdata = BytesIO()
        hist_data = create_hist_data(df, column, stats["min"], stats["max"], bins)
        figure = plt.figure(figsize=(6, 4))
        plot = plt.subplot()
        plt.bar(hist_data["left_edge"],
                hist_data["count"],
                width=hist_data["width"],
                facecolor='#337ab7')
        plot.set_ylabel("Frequency")
        plot.figure.subplots_adjust(left=0.15, right=0.95, top=0.9, bottom=0.1, wspace=0, hspace=0)
        plot.figure.savefig(imgdata)
        imgdata.seek(0)
        stats['histogram'] = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue()))
        #TODO Think about writing this to disk instead of caching them in strings
        plt.close(plot.figure)

        stats['mini_histogram'] = mini_histogram(hist_data)

        return stats
示例#22
0
文件: web.py 项目: gabegomes/esigen
def report(uuid,
           template='default',
           css='github',
           missing='N/A',
           reporter=ESIgenReport,
           engine='html'):
    """The location we send them to at the end of the upload."""
    if not uuid:
        return redirect(url_for("index", **URL_KWARGS))
    if engine not in ('html', 'zip', 'json', 'gist', 'md'):
        engine = 'html'
    # POST / GET handling
    custom_template = False
    if request.method == 'POST':
        form = request.form
        template = form['template']
        css = form['css']
        missing = form['missing-value'] if form.get('missing') else ''
        if template == 'custom':
            custom_template = True
            template = form['template-custom']
    else:
        template = request.args.get('template', template)
        css = request.args.get('css', css)
        if request.args.get('missing', missing):
            missing = request.args.get('missing-value', missing)
        if template == 'custom':
            custom_template = True
            template = request.args.get('template-custom', template)
    # Template
    if not custom_template:
        template_basename, template_ext = os.path.splitext(template)
        if template_ext != '.md':
            template = template_basename + '.md'
    # Style
    css_basename, css_ext = os.path.splitext(css)
    if css_ext != '.css':
        css = css_basename + '.css'

    # Get their reports.
    root = os.path.join(UPLOADS, uuid)
    if not os.path.isdir(root):
        return redirect(
            url_for("index", message="Upload error. Try again", **URL_KWARGS))

    reports, molecules = [], []
    html = engine == 'html'
    if html:
        preview = 'web'
    elif HAS_PYMOL and engine == 'zip':
        preview = 'static_server'
    else:
        preview = None
    missing = missing[:10] if missing is not None else None
    for fn in sorted(os.listdir(root)):
        if os.path.splitext(fn)[1] not in ALLOWED_EXTENSIONS:
            continue
        path = os.path.join(root, fn)
        molecule = reporter(path, missing=missing)
        report = molecule.report(template=template,
                                 preview=preview,
                                 process_markdown=html)
        reports.append((molecule, report))
        with open(os.path.join(root, molecule.name + '.md'), 'w') as f:
            f.write(report)
        if molecule.data.has_coordinates:
            with open(os.path.join(root, molecule.name + '.pdb'), 'w') as f:
                f.write(molecule.data.pdb_block)
            with open(os.path.join(root, molecule.name + '.xyz'), 'w') as f:
                f.write(molecule.data.xyz_block)
    if not reports:
        return redirect(
            url_for("index",
                    message="File(s) could not be parsed!",
                    **URL_KWARGS))

    if engine == 'html':
        return render_template('report.html',
                               css=css,
                               uuid=uuid,
                               reports=reports,
                               ngl='{{ viewer3d }}' in report,
                               template=template)
    elif engine == 'zip':
        memfile = BytesIO()
        with ZipFile(memfile, 'w', ZIP_DEFLATED) as zf:
            for base, dirs, files in os.walk(root):
                for filename in files:
                    zf.write(os.path.join(base, filename), arcname=filename)
        memfile.seek(0)
        return send_file(memfile,
                         attachment_filename='{}.zip'.format(uuid),
                         as_attachment=True)
    elif engine == 'json':
        d = {}
        for molecule, report in reports:
            d[molecule.basename] = {
                'report': report,
                'data': molecule.data_as_dict()
            }
        return jsonify(d)
    elif engine == 'gist':
        gist_data = {
            'description': "ESIgen report #{}".format(uuid),
            'public': True,
            'files': {
                'ESIgen.md': {
                    'content':
                    "Created with [ESIgen](https://github.com/insilichem/esigen)"
                }
            }
        }
        for molecule, report in reports:
            gist_data['files'][molecule.name + '.md'] = {'content': report}
            if molecule.data.has_coordinates:
                gist_data['files'][molecule.name + '.pdb'] = {
                    'content': molecule.data.pdb_block
                }
                gist_data['files'][molecule.name + '.xyz'] = {
                    'content': molecule.data.xyz_block
                }
        response = requests.post('https://api.github.com/gists',
                                 json=gist_data)
        response.raise_for_status()
        return redirect(response.json()['html_url'])
    elif engine == 'md':
        return Response('\n'.join([r for (m, r) in reports]),
                        content_type='text/plain')
示例#23
0
    def describe_float_1d(df, column, current_result, nrows):
        if spark_version == "1.6+":
            stats_df = df.select(column).na.drop().agg(mean(col(column)).alias("mean"),
                                                       df_min(col(column)).alias("min"),
                                                       df_max(col(column)).alias("max"),
                                                       variance(col(column)).alias("variance"),
                                                       kurtosis(col(column)).alias("kurtosis"),
                                                       stddev(col(column)).alias("std"),
                                                       skewness(col(column)).alias("skewness"),
                                                       df_sum(col(column)).alias("sum")
                                                       ).toPandas()
        else:
            stats_df = df.select(column).na.drop().agg(mean(col(column)).alias("mean"),
                                                       df_min(col(column)).alias("min"),
                                                       df_max(col(column)).alias("max"),
                                                       df_sum(col(column)).alias("sum")
                                                       ).toPandas()
            stats_df["variance"] = df.select(column).na.drop().agg(variance_custom(col(column),
                                                                                   stats_df["mean"].ix[0],
                                                                                   current_result["count"])).toPandas().ix[0][0]
            stats_df["std"] = np.sqrt(stats_df["variance"])
            stats_df["skewness"] = df.select(column).na.drop().agg(skewness_custom(col(column),
                                                                                   stats_df["mean"].ix[0],
                                                                                   current_result["count"])).toPandas().ix[0][0]
            stats_df["kurtosis"] = df.select(column).na.drop().agg(kurtosis_custom(col(column),
                                                                                   stats_df["mean"].ix[0],
                                                                                   current_result["count"])).toPandas().ix[0][0]

        for x in np.array([0.05, 0.25, 0.5, 0.75, 0.95]):
            stats_df[pretty_name(x)] = (df.select(column)
                                        .na.drop()
                                        .selectExpr("percentile_approx(`{col}`,CAST({n} AS DOUBLE))"
                                                    .format(col=column, n=x)).toPandas().ix[:,0]
                                        )
        stats = stats_df.ix[0].copy()
        stats.name = column
        stats["range"] = stats["max"] - stats["min"]
        stats["iqr"] = stats[pretty_name(0.75)] - stats[pretty_name(0.25)]
        stats["cv"] = stats["std"] / float(stats["mean"])
        stats["mad"] = (df.select(column)
                        .na.drop()
                        .select(df_abs(col(column)-stats["mean"]).alias("delta"))
                        .agg(df_sum(col("delta"))).toPandas().ix[0,0] / float(current_result["count"]))
        stats["type"] = "NUM"
        stats['n_zeros'] = df.select(column).where(col(column)==0.0).count()
        stats['p_zeros'] = stats['n_zeros'] / float(nrows)

        # Large histogram
        imgdata = BytesIO()
        hist_data = create_hist_data(df, column, stats["min"], stats["max"], bins)
        figure = plt.figure(figsize=(6, 4))
        plot = plt.subplot()
        plt.bar(hist_data["left_edge"],
                hist_data["count"],
                width=hist_data["width"],
                facecolor='#337ab7')
        plot.set_ylabel("Frequency")
        plot.figure.subplots_adjust(left=0.15, right=0.95, top=0.9, bottom=0.1, wspace=0, hspace=0)
        plot.figure.savefig(imgdata)
        imgdata.seek(0)
        stats['histogram'] = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue()))
        #TODO Think about writing this to disk instead of caching them in strings
        plt.close(plot.figure)

        stats['mini_histogram'] = mini_histogram(hist_data)

        return stats
示例#24
0
def create_pdf(form=None, sig_image=None):
    """
	given form results - 
	"""
    def get_from_form(value):
        if form:
            return form.cleaned_data.get(value, "")
        else:
            return ""

    email = get_from_form("email")
    phone_number = get_from_form("phone")
    postcode = get_from_form("postcode")
    surname = get_from_form("surname")
    first_names = get_from_form("first_names")
    add_1 = get_from_form("add_1")
    add_2 = get_from_form("add_2")
    city = get_from_form("city")
    county = get_from_form("county")
    alt_add_1 = get_from_form("alt_add_1")
    alt_add_2 = get_from_form("alt_add_2")
    alt_postcode = get_from_form("alt_postcode")
    alt_reason = get_from_form("reason")

    file_name = file_name_safe("{0}_{1}".format(surname, first_names).lower())

    council = get_from_form("council")
    multi_council = get_from_form("multi_council")

    if city and county:
        add_3 = city + ", " + county
    elif city:
        add_3 = city
    elif county:
        add_3 = county

    until_further_notice = get_from_form("universal")
    one_date = get_from_form("single_day")
    date_range = get_from_form("time_range")
    date_of_birth = get_from_form("dob")

    packet = BytesIO()
    # create a new PDF with Reportlab

    can = canvas.Canvas(packet, pagesize=letter)

    # add signature of present
    if sig_image:
        can.drawImage(ImageReader(sig_image), 293, 155, mask='auto')
    # core address info

    can.drawString(40, 667, surname.upper())
    can.drawString(40, 620, first_names.upper())

    can.drawString(40, 561, add_1.upper())
    can.drawString(40, 541, add_2.upper())
    can.drawString(40, 521, add_3.upper())

    can.drawString(40, 390, email.upper())
    can.drawString(40, 451, phone_number)
    can.drawString(100, 499, postcode.upper())

    # alt address

    can.drawString(285, 646, alt_add_1.upper())
    can.drawString(285, 626, alt_add_2.upper())
    can.drawString(350, 606, alt_postcode.upper())
    can.drawString(285, 548, alt_reason.upper())

    # for how long we want this on

    if until_further_notice:
        can.drawString(30, 278, "X")
    if one_date:
        can.drawString(30, 248, "X")
        write_date(can.drawString, 153, 213, one_date)
    if date_range:
        can.drawString(30, 181, "X")
        write_date(can.drawString, 153, 156, date_range[0])
        write_date(can.drawString, 153, 129, date_range[1])

    # today's date

    write_date(can.drawString, 457, 44, datetime.datetime.now())

    # birthdate

    can.setFont("Helvetica", 30)
    write_date(can.drawString,
               310,
               350,
               date_of_birth,
               25,
               extra_spacing=[1, 3])

    can.save()

    packet.seek(0)
    new_pdf = PdfFileReader(packet)

    front_page = create_front_page(council, postcode, multi_council)

    source_file = os.path.join(settings.PROJECT_PATH, "resources", "form.pdf")
    existing_pdf = PdfFileReader(open(source_file, "rb"))

    output = PdfFileWriter()

    # add the several pages objects to one pdf

    page = existing_pdf.getPage(0)
    page.mergePage(new_pdf.getPage(0))
    output.addPage(front_page)
    output.addPage(page)

    # send the stream into a response and return it to the view

    outputStream = BytesIO()
    output.write(outputStream)

    response = HttpResponse(content_type='application/pdf')
    response[
        'Content-Disposition'] = 'attachment; filename="postal_vote_{0}.pdf"'.format(
            file_name)
    response.write(outputStream.getvalue())

    outputStream.close()
    if council:
        council.increment_count()
    return response
示例#25
0
    def _download_single_zone_file(self, url):
        response = self._get(url)
        status_code = response.status_code

        if status_code == 200:
            zone_name = url.rsplit('/', 1)[-1].rsplit('.')[-2]
            compressed_file = BytesIO(response.content)

            _, option = cgi.parse_header(
                response.headers['content-disposition'])
            filename = option['filename']

            if not filename:
                filename = zone_name + '.txt.gz'

            path_filename = "{}{}".format(self.save_path, filename)

            decompressed_file = gzip.GzipFile(fileobj=compressed_file,
                                              mode='rb')
            text_list = []
            for line in decompressed_file.readlines():
                domain = line.decode('utf-8').split('\t')[0].rstrip('.')
                text_list.append(domain)
            text_string_list = '\n'.join(list(set(text_list)))
            text_string_bytes_object = BytesIO()
            text_string_bytes_object.write(text_string_list.encode('utf-8'))
            text_string_bytes_object.seek(0)
            text_string_buf = text_string_bytes_object.read()

            gzip_object = gzip.compress(text_string_buf)
            gzip_size = gzip_object.__sizeof__()

            MAX_FILE_SIZE = 1024 * 1024 * 35
            if gzip_size >= MAX_FILE_SIZE:
                chapters = 0
                source_buf = gzip_object

                n = MAX_FILE_SIZE
                final = [
                    source_buf[i * n:(i + 1) * n]
                    for i in range((len(source_buf) + n - 1) // n)
                ]  # list comprehension chunker

                for chunk in final:
                    chapters += 1
                    chapter_string = "{}".format(chapters)
                    chapter_string = chapter_string.zfill(2)
                    chapter_filename = "{}_{}{}".format(
                        zone_name, chapter_string, '.txt.gz')
                    chapter_path_filename = "{}{}".format(
                        self.save_path, chapter_filename)
                    with open(chapter_path_filename, 'wb+') as f:
                        f.write(chunk)
            else:
                with open(path_filename, 'wb+') as f:
                    f.write(gzip_object)

        elif status_code == 401:
            self.token = self.authenticate()
        elif status_code == 404:
            pass
        else:
            pass