Exemplo n.º 1
0
def move_user():
    for each_doc in mongo_user.find().sort('_id', direction=ASCENDING):
        item = dict()
        item['gen_time'] = each_doc.pop('_id').generation_time
        item['name'] = each_doc['name']
        item['credit'] = each_doc['credit'] if 'credit' in each_doc else 0
        item[
            'password'] = each_doc['password'] if 'password' in each_doc else 0
        item['exp'] = each_doc['exp'] if 'exp' in each_doc else 0
        item['role'] = each_doc['role'] if 'role' in each_doc else 0
        if len(item['name']) > 45:
            print(item['name'])
            continue
        cursor.execute(INSERT_USER_SQL, item)
        cursor.execute(GET_USER_ID_SQL, (each_doc['name']))

        user_id = cursor.fetchone()['user_id']
        cursor.execute(DELETE_USER_FOCUS_VIDEO_SQL, (user_id))
        cursor.execute(DELETE_USER_FOCUS_AUTHOR_SQL, (user_id))
        if 'favoriteAid' in each_doc:
            for each_aid in each_doc['favoriteAid']:
                if each_aid == None or each_aid > 4294967295:
                    continue
                item = {}
                item['user_id'] = int(user_id)
                item['video_id'] = int(each_aid)
                cursor.execute(INSERT_USER_FOCUS_VIDEO_SQL, item)
        if 'favoriteMid' in each_doc:
            for each_mid in each_doc['favoriteMid']:
                if each_mid == None or each_mid > 4294967295:
                    continue
                item = {}
                item['user_id'] = int(user_id)
                item['author_id'] = int(each_mid)
                cursor.execute(INSERT_USER_FOCUS_AUTHOR_SQL, item)
Exemplo n.º 2
0
def get_recommendation():
    if request.method == "POST":
        RATINGS_PATH = '/srv/movielens/ratings_matrix.npz'
        RATINGS_MATRIX = ratings_matrix.ratings_matrix(RATINGS_PATH)
        user_id = request.json["user_id"]
        sql_query = "SELECT DISTINCT movie_id FROM user_favorite WHERE user_id = %s"
        cursor.execute(sql_query, tuple([user_id]))
        movie_ids = cursor.fetchall()
        movie_list = []
        if len(movie_ids) == 0:
            result = {"recommended_movies": movie_list}
            return jsonify(result)

        liked = {}
        for movie_id in movie_ids:
            movie_id = movie_id[0]
            liked[movie_id] = 50
        print(liked)
        recommendations = get_matches(RATINGS_MATRIX, liked,
                                      NUM_RECOMMENDATIONS)
        print(recommendations)
        sql_query = "SELECT * FROM movie WHERE id = %s"
        for index in range(NUM_RECOMMENDATIONS):
            movie_id = recommendations[index][1]
            movie_id = RATINGS_MATRIX.imdb_id(movie_id)
            cursor.execute(sql_query, tuple([int(movie_id)]))
            movie_row = cursor.fetchone()
            movie = get_movie_details(movie_row)
            movie_list.append(movie)
        result = {"recommended_movies": movie_list}
        print(movie_list)
        return jsonify(result)
Exemplo n.º 3
0
def get_movie_details(row):
    id = row[0]
    sql_query = "SELECT person.name FROM person, person_junction " +\
                "WHERE person.id = person_junction.person_id" +\
                " AND person_junction.role = 'cast'" +\
                " AND person_junction.movie_id = %s"
    cursor.execute(sql_query, tuple([id]))
    casts = cursor.fetchmany(size=4)
    actors = []
    for cast in casts:
        actors.append(cast[0])
    sql_query = "SELECT rating FROM ratings WHERE movie_id = %s"
    cursor.execute(sql_query, tuple([id]))
    rating = cursor.fetchone()[0]
    movie = {
        "id": row[0],
        "title": row[1],
        "year": row[2],
        "url": row[3],
        "plot": row[5][:200] + " ...",
        "genre": row[6],
        "language": row[7],
        "casts": actors,
        "rating": rating
    }
    return movie
Exemplo n.º 4
0
def _fetch_posts(epoch, town, cache=True):
    p = r.pipeline()
    posts = r.smembers(f'posts:{town}:{epoch}')
    hot_factors = []

    if len(posts) != 0:
        # posts is a set in redis cache. This is
        # to (naively) support concurrent caching of posts
        # without worrying about duplicate posts
        posts = list(posts)
        for i in range(len(posts)):
            posts[i] = posts[i].decode('utf-8')
            p.get(f'post:{posts[i]}:hot_factor')

        hot_factors = [None if elem is None else int(elem) for elem in p.execute()]
        _calculate_hot_factors_batch(posts, hot_factors)
        return posts, hot_factors

    posts = []
    cursor.execute('SELECT creator, creationTime, votes, "views" \
        FROM Posts WHERE town=? \
        AND creationTime BETWEEN ? and ?', town, epoch, epoch + TIME_BLOCK_SIZE - 1)
    
    row = cursor.fetchone()
    while row:
        post_id = _construct_post_id(row[0], row[1])
        posts.append(post_id)

        hf = calculate_hot_factor(row[1], row[2], row[3])
        hot_factors.append(hf)

        if cache:
            p.set(f'post:{post_id}:votes', row[2])
            p.set(f'post:{post_id}:views', row[3])
            p.set(f'post:{post_id}:hot_factor', hf)
            # force app to recalculate hot factor
            p.expire(f'post:{post_id}:hot_factor', HOT_FACTOR_EXPIRATION)

        row = cursor.fetchone()
    
    if cache:
        if (len(posts)) != 0:
            p.sadd(f'posts:{town}:{epoch}', *posts)
        p.execute()
    
    return posts, hot_factors
Exemplo n.º 5
0
def get_setting(setting):

# Get a setting from the database

	query = ("SELECT value FROM settings WHERE setting='%s' ORDER BY "
		"last_modified DESC LIMIT 1" % setting)
	cursor.execute(query)
	return cursor.fetchone()["value"]
Exemplo n.º 6
0
def get_setting(setting):

    # Get a setting from the database

    query = ("SELECT value FROM settings WHERE setting=%s ORDER BY "
             "last_modified DESC LIMIT 1")
    cursor.execute(query, (setting, ))
    return cursor.fetchone()["value"]
Exemplo n.º 7
0
def get_favorite():
    if request.method == "POST":
        user_id = request.json["user_id"]
        sql_query = "SELECT movie_id FROM user_favorite WHERE user_id = %s"
        cursor.execute(sql_query, tuple([user_id]))
        rows = cursor.fetchall()
        movie_list = []
        for movie_id in rows:
            movie_id = movie_id[0]
            sql_query = "SELECT * FROM movie WHERE id = %s"
            cursor.execute(sql_query, tuple([movie_id]))
            movie_row = cursor.fetchone()
            movie = get_movie_details(movie_row)
            movie_list.append(movie)
        result = {"favorite_movies": movie_list}
        return jsonify(result)
Exemplo n.º 8
0
    def find_user(cls, username):
        try:
            select_query = """SELECT id, username, pwd
                                            FROM users
                                            WHERE username = %s"""

            cursor.execute(select_query, username)
            row = cursor.fetchone()

            if row:
                return cls(row['id'], row['username'], row['pwd'])
        except (Exception, psycopg2.Error) as error:
            connection.rollback()
            return {
                        'status': 500,
                        'message': json.dumps(error, default=str)
                    }, 500
Exemplo n.º 9
0
    def get(self, order_id):
        get_query = """SELECT *
                                FROM parcels
                                WHERE id = %s"""
        cursor.execute(get_query, (order_id, ))
        one_parcel = cursor.fetchone()

        if one_parcel is not None:
            return {
                'status': 200,
                'data': json.dumps(one_parcel,
                                   default=str,
                                   separators=(',', ': '))
            }, 200
        else:
            return {
                'message':
                "Parcel with id '{}' does not exist.".format(order_id)
            }, 404
Exemplo n.º 10
0
    def post(self):
        data = ParcelsList.parser.parse_args()

        create_parcels_table()
        insert_query = """INSERT INTO parcels (placedBy, weight, weightmetric, 
                            sentOn, deliveredOn, status, source, destination, currentlocation) 
                            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) RETURNING id;"""
        record_to_insert = list(data.values())

        cursor.execute(insert_query, record_to_insert)
        cursor.execute('SELECT LASTVAL()')
        row_id = cursor.fetchone()['lastval']
        connection.commit()

        return {
            'status': 201,
            'data': [{
                'id': row_id,
                'message': 'order created'
            }]
        }, 201
Exemplo n.º 11
0
def _calculate_hot_factors_batch(posts, hot_factors):
    # posts at these indices don't have hot factors
    # in cache (expired or never calculated)
    indices = []
    p = r.pipeline()

    for i in range(len(posts)):
        post = posts[i]
        if hot_factors[i] is None:
            indices.append(i)
            p.get(f'post:{post}:votes')
            p.get(f'post:{post}:views')

    stats = p.execute()
    inputs = []
    for i in range(0, len(stats), 2):
        post = posts[indices[i//2]]
        creator, creation_time = _deconstruct_post_id(post)

        # if votes or views doesn't exist
        if stats[i] is None or stats[i+1] is None:
            # kinda bothers me that there is no way to "batch"
            # SELECTs
            cursor.execute('SELECT votes, "views" FROM Posts\
                WHERE creator=? AND creationTime=?', creator, creation_time)
            row = cursor.fetchone()
            stats[i], stats[i+1] = row[0], row[1]
            p.set(f'post:{post}:votes', row[0])
            p.set(f'post:{post}:views', row[1])
        else:
            stats[i], stats[i+1] = int(stats[i]), int(stats[i+1])

        hf = calculate_hot_factor(creation_time, stats[i], stats[i+1])
        hot_factors[indices[i//2]] = hf

        p.set(f'post:{post}:hot_factor', hf)
        # reuse hot factor up to HOT_FACTOR_EXPIRATION
        p.expire(f'post:{post}:hot_factor', HOT_FACTOR_EXPIRATION)

    p.execute()
Exemplo n.º 12
0
def fill_empty_affiliations():

    # When a record is added, it has no affiliation data. Also, when an affiliation
    # mapping is changed via the UI, affiliation data will be set to NULL. This
    # function finds any records with NULL affiliation data and fills them.

    update_status('Filling empty affiliations')
    log_activity('Info', 'Filling empty affiliations')

    # Process any changes to the affiliations or aliases, and set any existing
    # entries in analysis_data to NULL so they are filled properly.

    # First, get the time we started fetching since we'll need it later

    cursor.execute("SELECT current_timestamp(6) as fetched")

    affiliations_fetched = cursor.fetchone()['fetched']

    # Now find the last time we worked on affiliations, to figure out what's new

    affiliations_processed = get_setting('affiliations_processed')

    get_changed_affiliations = ("SELECT domain FROM affiliations WHERE "
                                "last_modified >= %s")

    cursor_people.execute(get_changed_affiliations, (affiliations_processed, ))

    changed_affiliations = list(cursor_people)

    # Process any affiliations which changed since we last checked

    for changed_affiliation in changed_affiliations:

        log_activity(
            'Debug',
            'Resetting affiliation for %s' % changed_affiliation['domain'])

        set_author_to_null = (
            "UPDATE analysis_data SET author_affiliation = NULL "
            "WHERE author_email = %s")

        cursor.execute(set_author_to_null, (changed_affiliation['domain'], ))
        db.commit()

        set_committer_to_null = (
            "UPDATE analysis_data SET committer_affiliation = NULL "
            "WHERE committer_email = %s")

        cursor.execute(set_committer_to_null,
                       (changed_affiliation['domain'], ))
        db.commit()

    # Update the last fetched date, so we know where to start next time.

    update_affiliations_date = ("UPDATE settings SET value=%s "
                                "WHERE setting = 'affiliations_processed'")

    cursor.execute(update_affiliations_date, (affiliations_fetched, ))
    db.commit()

    # On to the aliases, now

    # First, get the time we started fetching since we'll need it later

    cursor.execute("SELECT current_timestamp(6) as fetched")

    aliases_fetched = cursor.fetchone()['fetched']

    # Now find the last time we worked on aliases, to figure out what's new

    aliases_processed = get_setting('aliases_processed')

    get_changed_aliases = ("SELECT alias FROM aliases WHERE "
                           "last_modified >= %s")

    cursor_people.execute(get_changed_aliases, (aliases_processed, ))

    changed_aliases = list(cursor_people)

    # Process any aliases which changed since we last checked

    for changed_alias in changed_aliases:

        log_activity('Debug',
                     'Resetting affiliation for %s' % changed_alias['alias'])

        set_author_to_null = (
            "UPDATE analysis_data SET author_affiliation = NULL "
            "WHERE author_raw_email = %s")

        cursor.execute(set_author_to_null, (changed_alias['alias'], ))
        db.commit()

        set_committer_to_null = (
            "UPDATE analysis_data SET committer_affiliation = NULL "
            "WHERE committer_raw_email = %s")

        cursor.execute(set_committer_to_null, (changed_alias['alias'], ))
        db.commit()

        reset_author = ("UPDATE analysis_data "
                        "SET author_email = %s "
                        "WHERE author_raw_email = %s")

        cursor.execute(
            reset_author,
            (discover_alias(changed_alias['alias']), changed_alias['alias']))
        db.commit

        reset_committer = ("UPDATE analysis_data "
                           "SET committer_email = %s "
                           "WHERE committer_raw_email = %s")

        cursor.execute(
            reset_committer,
            (discover_alias(changed_alias['alias']), changed_alias['alias']))
        db.commit

    # Update the last fetched date, so we know where to start next time.

    update_aliases_date = ("UPDATE settings SET value=%s "
                           "WHERE setting = 'aliases_processed'")

    cursor.execute(update_aliases_date, (aliases_fetched, ))
    db.commit()

    # Now rebuild the affiliation data

    working_author = get_setting('working_author').replace("'", "\\'")

    if working_author != 'done':
        log_activity(
            'Error',
            'Trimming author data in affiliations: %s' % working_author)
        trim_author(working_author)

    # Find any authors with NULL affiliations and fill them

    find_null_authors = ("SELECT DISTINCT author_email AS email, "
                         "MIN(author_date) AS earliest "
                         "FROM analysis_data "
                         "WHERE author_affiliation IS NULL "
                         "GROUP BY author_email")

    cursor.execute(find_null_authors)

    null_authors = list(cursor)

    log_activity('Debug',
                 'Found %s authors with NULL affiliation' % len(null_authors))

    for null_author in null_authors:

        email = null_author['email'].replace("'", "\\'")

        store_working_author(email)

        discover_null_affiliations('author', email)

    store_working_author('done')

    # Find any committers with NULL affiliations and fill them

    find_null_committers = ("SELECT DISTINCT committer_email AS email, "
                            "MIN(committer_date) AS earliest "
                            "FROM analysis_data "
                            "WHERE committer_affiliation IS NULL "
                            "GROUP BY committer_email")

    cursor.execute(find_null_committers)

    null_committers = list(cursor)

    log_activity(
        'Debug',
        'Found %s committers with NULL affiliation' % len(null_committers))

    for null_committer in null_committers:

        email = null_committer['email'].replace("'", "\\'")

        store_working_author(email)

        discover_null_affiliations('committer', email)

    # Now that we've matched as much as possible, fill the rest as (Unknown)

    fill_unknown_author = ("UPDATE analysis_data "
                           "SET author_affiliation = '(Unknown)' "
                           "WHERE author_affiliation IS NULL")

    cursor.execute(fill_unknown_author)
    db.commit()

    fill_unknown_committer = ("UPDATE analysis_data "
                              "SET committer_affiliation = '(Unknown)' "
                              "WHERE committer_affiliation IS NULL")

    cursor.execute(fill_unknown_committer)
    db.commit()

    store_working_author('done')

    log_activity('Info', 'Filling empty affiliations (complete)')
Exemplo n.º 13
0
def analysis():

    # Run the analysis by looping over all active repos. For each repo, we retrieve
    # the list of commits which lead to HEAD. If any are missing from the database,
    # they are filled in. Then we check to see if any commits in the database are
    # not in the list of parents, and prune them out.
    #
    # We also keep track of the last commit to be processed, so that if the analysis
    # is interrupted (possibly leading to partial data in the database for the
    # commit being analyzed at the time) we can recover.

    update_status('Running analysis')
    log_activity('Info', 'Beginning analysis')

    start_date = get_setting('start_date')

    repo_list = "SELECT id,projects_id,path,name FROM repos WHERE status='Active'"
    cursor.execute(repo_list)
    repos = list(cursor)

    for repo in repos:

        update_analysis_log(repo['id'], 'Beginning analysis')
        log_activity('Verbose',
                     'Analyzing repo: %s (%s)' % (repo['id'], repo['name']))

        # First we check to see if the previous analysis didn't complete

        get_status = ("SELECT working_commit FROM repos WHERE id=%s")

        cursor.execute(get_status, (repo['id'], ))
        working_commit = cursor.fetchone()['working_commit']

        # If there's a commit still there, the previous run was interrupted and
        # the commit data may be incomplete. It should be trimmed, just in case.
        if working_commit:
            trim_commit(repo['id'], working_commit)
            store_working_commit(repo['id'], '')

        # Start the main analysis

        update_analysis_log(repo['id'], 'Collecting data')

        repo_loc = ('%s%s/%s%s/.git' %
                    (repo_base_directory, repo["projects_id"], repo["path"],
                     repo["name"]))
        # Grab the parents of HEAD

        parents = subprocess.Popen([
            "git --git-dir %s log --ignore-missing "
            "--pretty=format:'%%H' --since=%s" % (repo_loc, start_date)
        ],
                                   stdout=subprocess.PIPE,
                                   shell=True)

        parent_commits = set(parents.stdout.read().split(os.linesep))

        # If there are no commits in the range, we still get a blank entry in
        # the set. Remove it, as it messes with the calculations

        if '' in parent_commits:
            parent_commits.remove('')

        # Grab the existing commits from the database

        existing_commits = set()

        find_existing = (
            "SELECT DISTINCT commit FROM analysis_data WHERE repos_id=%s")

        cursor.execute(find_existing, (repo['id'], ))

        for commit in list(cursor):
            existing_commits.add(commit['commit'])

        # Find missing commits and add them

        missing_commits = parent_commits - existing_commits

        log_activity(
            'Debug', 'Commits missing from repo %s: %s' %
            (repo['id'], len(missing_commits)))

        for commit in missing_commits:

            store_working_commit(repo['id'], commit)

            analyze_commit(repo['id'], repo_loc, commit)

            store_working_commit(repo['id'], '')

        update_analysis_log(repo['id'], 'Data collection complete')

        update_analysis_log(repo['id'], 'Beginning to trim commits')

        # Find commits which are out of the analysis range

        trimmed_commits = existing_commits - parent_commits

        log_activity(
            'Debug', 'Commits to be trimmed from repo %s: %s' %
            (repo['id'], len(trimmed_commits)))

        for commit in trimmed_commits:

            trim_commit(repo['id'], commit)

        update_analysis_log(repo['id'], 'Commit trimming complete')

        update_analysis_log(repo['id'], 'Analysis complete')

    log_activity('Info', 'Running analysis (complete)')
Exemplo n.º 14
0
from db import cursor
s = set()
c = csv.reader(
    open('E:/source/bili_history_sim/resimed.csv', 'r', encoding="utf-8-sig"))
for each in c:
    s.add(each[0])

with open('./get_data/face.py', 'w', encoding="utf-8-sig") as f:
    f.writelines('face = ' + str(face))

authors = []
for each_author in s:
    authors.append(each_author)
    if each_author not in face:
        cursor.execute(""" SELECT * FROM author WHERE name=%s """, each_author)
        author_data = cursor.fetchone()
        face[each_author] = author_data['face']
        pass
with open('./get_data/face.py', 'w', encoding="utf-8-sig") as f:
    f.writelines('face = ' + str(face))

for each_author in face:
    if each_author in color:
        continue
    if face[each_author][-3:] == 'gif' or each_author == '开眼视频App':
        color[each_author] = '#000000'
    else:
        color_list = Haishoku.getPalette(face[each_author])
        color_list = sorted(color_list,
                            key=lambda x: x[1][0] + x[1][1] + x[1][2])
        color[each_author] = 'rgb' + \