Пример #1
0
def upgrade():
    # ### commands auto generated by Alembic - please adjust! ###
    # op.add_column('posts', sa.Column('body', sa.Unicode(), nullable=True))
    # ### end Alembic commands ###

    # Force commit the operation
    conn = op.get_bind()
    conn.execute("COMMIT")

    updates = []
    added = 0
    db = sm()
    commit_db = sm()

    for post in db.query(Post).filter(sa.or_(
            Post.data.has_key("body"), )).yield_per(512):
        new_data = post.data.copy()
        update = {"id": post.id, "data": new_data}

        update["body"] = new_data.pop("body", post.body)

        updates.append(update)

        # Commit every 1024 posts.
        added += 1
        if len(updates) == 1024:
            commit_db.execute("SET synchronous_commit TO off")
            print(f"{added} posts processed.", flush=True)
            commit_db.bulk_update_mappings(Post, updates)
            commit_db.commit()
            updates.clear()

    if updates:
        commit_db.bulk_update_mappings(Post, updates)
        commit_db.commit()
def upgrade():
    ### commands auto generated by Alembic - please adjust! ###
    op.add_column(
        'blogs',
        sa.Column('total_likes',
                  sa.Integer(),
                  server_default='0',
                  nullable=False))
    # ### end Alembic commands ###

    # Force commit the operation
    conn = op.get_bind()
    conn.execute("COMMIT")

    updates = []
    added = 0
    db = sm()
    commit_db = sm()

    for blog in db.query(Blog).filter(
            sa.or_(Blog.data.has_key("likes"),
                   Blog.data.has_key("share_likes"))).yield_per(512):
        try:
            new_data = blog.data.copy()
            new_data.pop("share_likes", None)

            total_likes = max(new_data.pop("likes", 0), blog.total_likes)

            updates.append({
                "id": blog.id,
                "total_likes": total_likes,
                "data": new_data
            })
        except KeyError:
            continue

        # Commit every 512 blogs.
        added += 1
        if len(updates) == 512:
            commit_db.execute("SET synchronous_commit TO off")
            print(f"{added} blogs processed.", flush=True)
            commit_db.bulk_update_mappings(Blog, updates)
            commit_db.commit()
            updates.clear()

    if updates:
        commit_db.bulk_update_mappings(Blog, updates)
        commit_db.commit()
Пример #3
0
    def __init__(self):
        self.tumblr = create_tumblr()
        self.db = sm()
        self.grabbed = collections.defaultdict(lambda: 0)
        self.bad = collections.defaultdict(lambda: 0)

        self.queue = []
        self.running = True
Пример #4
0
def upgrade():
    db = sm()
    for post in db.query(Post):
        blog = get_blog(db, post.url)
        post.author = blog.id
        sys.stdout.write(".")
    sys.stdout.write("\nCommiting.\n")
    db.commit()
Пример #5
0
def upgrade():
    # ### commands auto generated by Alembic - please adjust! ###
    op.add_column('blogs', sa.Column('updated', sa.DateTime(), nullable=True))
    # ### end Alembic commands ###

    # Force commit the operation
    conn = op.get_bind()
    conn.execute("COMMIT")

    updates = []
    added = 0
    db = sm()
    commit_db = sm()

    for blog in db.query(Blog).filter(sa.or_(
        Blog.data.has_key("updated")
    )).yield_per(512):
        try:
            new_data = blog.data.copy()
            updated_epoch = new_data.pop("updated", 0)
            updated_date = max(
                datetime.datetime.fromtimestamp(updated_epoch),
                getattr(blog, "updated", None) or datetime.datetime.fromtimestamp(updated_epoch)
            )

            updates.append({
                "id": blog.id,
                "updated": updated_date,
                "data": new_data
            })
        except KeyError:
            continue
        
        # Commit every 512 blogs.
        added += 1
        if len(updates) == 512:
            commit_db.execute("SET synchronous_commit TO off")
            print(f"{added} blogs processed.", flush=True)
            commit_db.bulk_update_mappings(Blog, updates)
            commit_db.commit()
            updates.clear()

    if updates:
        commit_db.bulk_update_mappings(Blog, updates)
        commit_db.commit()
Пример #6
0
def upgrade():
    # ### commands auto generated by Alembic - please adjust! ###
    op.add_column('posts',
                  sa.Column('tumblr_id', sa.BigInteger(), nullable=True))
    # ### end Alembic commands ###

    # Force commit the operation
    conn = op.get_bind()
    conn.execute("COMMIT")

    updates = []
    added = 0
    db = sm()
    commit_db = sm()

    for post in db.query(Post).filter(sa.or_(
            Post.data.has_key("id"), )).yield_per(512):
        try:
            new_data = post.data.copy()
            tumblr_id = new_data.pop("id")

            updates.append({
                "id": post.id,
                "tumblr_id": tumblr_id,
                "data": new_data
            })
        except KeyError:
            continue

        # Commit every 512 posts.
        added += 1
        if len(updates) == 512:
            commit_db.execute("SET synchronous_commit TO off")
            print(f"{added} posts processed.", flush=True)
            commit_db.bulk_update_mappings(Post, updates)
            commit_db.commit()
            updates.clear()

    if updates:
        commit_db.bulk_update_mappings(Post, updates)
        commit_db.commit()
Пример #7
0
def worker():
    sql = sm()

    while running and len(urls) > 0:
        url = urls.pop(random.randrange(len(urls)))

        try:
            process_url(sql, url)
        except:
            if sentry_sdk:
                sentry_sdk.capture_exception()
            traceback.print_exc()
def connect_sql():
    if request.endpoint == "static":
        return

    g.sql = sm()
Пример #9
0
def upgrade():
    # ### commands auto generated by Alembic - please adjust! ###
    op.create_table(
        'posts_meta', sa.Column('id', sa.Integer(), nullable=False),
        sa.Column('can_like', sa.Boolean(), server_default='t', nullable=True),
        sa.Column('can_reblog',
                  sa.Boolean(),
                  server_default='t',
                  nullable=True),
        sa.Column('can_reply', sa.Boolean(), server_default='t',
                  nullable=True),
        sa.Column('display_avatar',
                  sa.Boolean(),
                  server_default='t',
                  nullable=True),
        sa.Column('is_blocks_post_format',
                  sa.Boolean(),
                  server_default='f',
                  nullable=True),
        sa.Column('can_send_in_message',
                  sa.Boolean(),
                  server_default='t',
                  nullable=True),
        sa.Column('post_url', sa.String(), nullable=True),
        sa.Column('short_url', sa.String(), nullable=True),
        sa.ForeignKeyConstraint(
            ['id'],
            ['posts.id'],
        ), sa.PrimaryKeyConstraint('id'))
    # op.add_column('posts', sa.Column('slug', sa.Unicode(), nullable=True))
    # op.add_column('posts', sa.Column('state', sa.String(), nullable=True))
    # op.add_column('posts', sa.Column('summary', sa.Unicode(), nullable=True))
    # ### end Alembic commands ###

    # Force commit the operation
    conn = op.get_bind()
    conn.execute("COMMIT")

    updates = []
    added = 0
    db = sm()
    commit_db = sm()

    for post in db.query(Post).filter(
            sa.or_(Post.data.has_key("slug"), Post.data.has_key("state"),
                   Post.data.has_key("summary"),
                   Post.data.has_key("liked"))).yield_per(512):
        new_data = post.data.copy()

        update = {"id": post.id, "data": new_data}

        update["slug"] = new_data.pop("slug", post.slug)
        update["state"] = new_data.pop("state", post.state)
        update["summary"] = new_data.pop("summary", post.summary)
        post_meta = PostMeta.create_from_metadata(commit_db, new_data, post.id)

        updates.append(update)

        # Commit every 512 posts.
        added += 1
        if len(updates) == 512:
            commit_db.execute("SET synchronous_commit TO off")
            print(f"{added} posts processed.", flush=True)
            commit_db.bulk_update_mappings(Post, updates)
            commit_db.commit()
            updates.clear()

    if updates:
        commit_db.bulk_update_mappings(Post, updates)
        commit_db.commit()
Пример #10
0
 def db(self):
     return sm()
def upgrade():
    # ### commands auto generated by Alembic - please adjust! ###
    # op.add_column('posts', sa.Column('format', sa.String(), nullable=True))
    # op.add_column('posts', sa.Column('note_count', sa.Integer(), nullable=True))
    # op.add_column('posts', sa.Column('posted', sa.DateTime(), nullable=True))
    # op.add_column('posts', sa.Column('reblog_key', sa.String(), nullable=True))
    # ### end Alembic commands ###

    # Force commit the operation
    conn = op.get_bind()
    conn.execute("COMMIT")

    updates = []
    added = 0
    db = sm()
    commit_db = sm()

    for post in db.query(Post).filter(sa.or_(
        Post.data.has_key("format"),
        Post.data.has_key("note_count"),
        Post.data.has_key("timestamp"),
        Post.data.has_key("reblog_key"),
    )).yield_per(512):
        try:
            new_data = post.data.copy()

            # Post time
            new_data.pop("date", None)
            post_epoch = new_data.pop("timestamp", 0)
            post_time = max(
                datetime.datetime.fromtimestamp(post_epoch),
                post.posted or datetime.datetime.fromtimestamp(post_epoch)
            )

            # Other fields
            post_format = new_data.pop("format", post.format)
            post_note_count = max(
                post.note_count or 0,
                new_data.pop("note_count", 0)
            )
            post_reblog_key = new_data.pop("reblog_key", post.reblog_key)

            updates.append({
                "id": post.id,
                "posted": post_time,
                "note_count": post_note_count,
                "reblog_key": post_reblog_key,
                "data": new_data
            })
        except KeyError:
            continue
        
        # Commit every 512 posts.
        added += 1
        if len(updates) == 512:
            commit_db.execute("SET synchronous_commit TO off")
            print(f"{added} posts processed.", flush=True)
            commit_db.bulk_update_mappings(Post, updates)
            commit_db.commit()
            updates.clear()

    if updates:
        commit_db.bulk_update_mappings(Post, updates)
        commit_db.commit()
Пример #12
0
import sys
import time
import json

from urllib.parse import urlparse

from sqlalchemy.dialects.postgresql import insert

from archives.lib.connections import create_tumblr
from archives.lib.model import Post, Blog, sm

sql = sm()
tungle = create_tumblr()

META_POP = ["status", "msg", ""]

for post in sql.query(Post).distinct(Post.url):
    # Terrible but there's not many blogs in the DB so it can't be that bad.
    if sql.query(Blog).filter(Blog.name == post.url).scalar():
        continue

    time.sleep(0.25)
    info = tungle.blog_info(post.url)

    # Ignore 404s
    if "meta" in info:
        if info["meta"]["status"] == 404:
            print(f"{post.url} 404")
            continue

    # wot how
Пример #13
0
import code
from redis import StrictRedis
from archives.lib.model import sm, Blog, Post
from archives.lib.connections import create_tumblr, redis_pool
from archives.tasks.tumblr import add_post, archive_post, archive_blog

db = sm()
redis = StrictRedis(connection_pool=redis_pool)
tumblr = create_tumblr()

if __name__ == "__main__":
    code.interact(local=dict(globals(), **locals()))