示例#1
0
def save_n_words(word_dict: dict):
    psql = Postgres()
    words = []
    for item in word_dict.items():
        words.append(item)

    psql.save_words(words)

    psql.close()
示例#2
0
def count_words() -> dict:
    words_dict = {}
    psql = Postgres()
    text = psql.get_posts_text()

    for item in text:
        item = clear_text(item[0])

        for word in item.split():
            words_dict[word] = words_dict.get(word, 0) + 1

    psql.close()

    return words_dict
示例#3
0
def save_posts(posts):
    psql = Postgres()
    for post in posts['items']:
        id = post['id']
        text = post['text']
        if psql.is_post_in(id):
            break

        if 'copy_history' in post:
            for nested_post in post['copy_history']:
                text += ' ' + nested_post['text']

        psql.add_post(text=text, vk_id=id)
    psql.close()
示例#4
0
def do_generate():
    pg = Postgres()
    schemas_filter = []

    output_folder = os.environ.get("MARKDOWN_FOLDER", "static/")
    template_name = os.environ.get("TEMPLATE_SRC", "template.md")

    if not os.path.isdir(output_folder):
        logger.error("Path {} is not a directory, cannot continue".format(
            output_folder))
        return

    if not os.path.exists(template_name):
        logger.error(
            "template {} doesn't exist, cannot continue".format(template_name))
        return

    schema_dict = pg.get_schema_dict()
    DEV = '_dev'
    PROD = ''
    schemas = {PROD: [], DEV: []}
    started_at = time.time()

    for schema_name in sorted(schema_dict):
        tables = schema_dict[schema_name]
        if not schemas_filter or schema_name in schemas_filter:
            schema = {
                "schema_name": schema_name,
                "schema_name_anchor": schema_name.lower()
            }
            events = []
            logger.debug("generating schema {}".format(schema_name))
            for table_name in sorted(tables):
                logger.debug("building event data for {}".format(table_name))
                table = pg.get_table(table_name, schema_name, validate=False)
                properties = sorted(table.get_properties())
                sample_event, event_text = table.sample(most_recent=True)
                if not event_text:
                    event_text = table_name
                logger.debug("finished sampling")
                events.append({
                    "event_text":
                    event_text,
                    "event_name":
                    table_name,
                    "event_anchor": (event_text or '').replace(" ",
                                                               "-").lower(),
                    "properties": [{
                        "name": prop
                    } for prop in properties],
                    "sample_event":
                    dump(sample_event),
                })
            schema["events"] = events
            if schema_name[-4:] == DEV:
                schemas[DEV].append(schema)
            else:
                schemas[PROD].append(schema)
    pg.close()

    for suffix, _schemas in schemas.items():
        out_name = os.path.join(output_folder, "events{}.md".format(suffix))
        with open(out_name, 'w') as f:
            with open(template_name, 'r') as _template:
                template = _template.read()

                logger.info("writing template({}) to {}".format(
                    template_name, out_name))
                out = {
                    "finished_in":
                    round(time.time() - started_at, 2),
                    "schemas":
                    _schemas,
                    "generated_at":
                    str(
                        datetime.datetime.now(pytz.utc).astimezone(
                            pytz.timezone("US/Eastern"))).split(".")[0]
                }
                logger.debug(dump(out))
                md = pystache.render(template, out)
                f.write(md)
import numpy as np

from postgres import Postgres

pg = Postgres()

res = pg.query('SELECT x, y FROM linear_regression;')

print(res)

pg.close()