Пример #1
0
def swapCodeForTokens(response):
	# takes dict of response parameters as input, like {'error':'blah blah'} or {'code':'blah blah','state':'blah blah'}
	db = connect_db()
	c=db.cursor()
	state = json.loads(response['state'])
	user_identifier = state['id']
	redir = state['redir']
	if 'error' in response:
		c.execute('UPDATE users SET imgur_json=NULL WHERE imgur_id='+app.sqlesc,(user_identifier,))
		db.commit()
		return {'success':False}
	# called at the server redirect when imgur returns the code
	client = ImgurClient(app.config['IMGUR_CLIENTID'],app.config['IMGUR_SECRET'])
	credentials = client.authorize(response['code'],'authorization_code')
	# print credentials
	if 'access_token' in credentials.keys() and 'refresh_token' in credentials.keys():
		db = connect_db()
		c = db.cursor()
		c.execute('UPDATE users SET imgur_json='+app.sqlesc+' WHERE imgur_id='+app.sqlesc,(json.dumps(credentials),user_identifier))
		db.commit()
		db.close()
		return {'success':True,'redir':redir}
	else:
		c.execute('UPDATE users SET imgur_json=NULL WHERE imgur_id='+app.sqlesc,(user_identifier,))
		db.commit()
		return {'success':False}
Пример #2
0
def swapCodeForTokens(response):
    # takes dict of response parameters as input, like {'error':'blah blah'} or {'code':'blah blah','state':'blah blah'}
    db = connect_db()
    c = db.cursor()
    state = json.loads(response['state'])
    user_identifier = state['id']
    redir = state['redir']
    if 'error' in response:
        c.execute(
            'UPDATE users SET imgur_json=NULL WHERE imgur_id=' + app.sqlesc,
            (user_identifier, ))
        db.commit()
        return {'success': False}
    # called at the server redirect when imgur returns the code
    client = ImgurClient(app.config['IMGUR_CLIENTID'],
                         app.config['IMGUR_SECRET'])
    credentials = client.authorize(response['code'], 'authorization_code')
    # print credentials
    if 'access_token' in credentials.keys(
    ) and 'refresh_token' in credentials.keys():
        db = connect_db()
        c = db.cursor()
        c.execute(
            'UPDATE users SET imgur_json=' + app.sqlesc + ' WHERE imgur_id=' +
            app.sqlesc, (json.dumps(credentials), user_identifier))
        db.commit()
        db.close()
        return {'success': True, 'redir': redir}
    else:
        c.execute(
            'UPDATE users SET imgur_json=NULL WHERE imgur_id=' + app.sqlesc,
            (user_identifier, ))
        db.commit()
        return {'success': False}
Пример #3
0
 def setUpClass(cls) -> None:
     print('setUpClass')
     app = create_app()
     app.config.update(
         TESTING=True,
         SQLALCHEMY_DATABASE_URI=
         'mysql+cymysql://root:159951@localhost:3306/test?charset=utf8',
         SQLALCHEMY_ENCODING='utf-8',
         SQLALCHEMY_TRACK_MODIFICATIONS=
         False  # 屏蔽 sql alchemy 的 FSADeprecationWarning
     )
     cls.app = app
     cls.client = app.test_client()
     with cls.app.app_context():
         connect_db(app)
Пример #4
0
def checkApiAccess(userid):
    # something that checks whether we have api keys and whether they work;
    # if not, return False
    db = connect_db()
    c = db.cursor()
    c.execute('SELECT imgur_json FROM users WHERE id=' + app.sqlesc,
              (userid, ))
    r = c.fetchone()
    if len(r) > 0:
        try:
            r = json.loads(r[0])
            access_token = r['access_token']
            refresh_token = r['refresh_token']
        except TypeError:
            return False
    else:
        return False
    client = ImgurClient(app.config['IMGUR_CLIENTID'],
                         app.config['IMGUR_SECRET'])
    client.set_user_auth(access_token, refresh_token)
    try:
        client.get_account('me').url
        credits = client.credits
        # print(credits)
        if credits['ClientRemaining'] > 10 and credits['UserRemaining'] > 10:
            return True
        else:
            return None
    except ImgurClientError:
        return False
Пример #5
0
def checkApiAccess(userid):
	# something that checks whether we have api keys and whether they work;
	# if not, return False
	db = connect_db()
	c = db.cursor()
	c.execute('SELECT imgur_json FROM users WHERE id='+app.sqlesc,(userid,))
	r = c.fetchone()
	if len(r) > 0:
		try:
			r = json.loads(r[0])
			access_token = r['access_token']
			refresh_token = r['refresh_token']
		except TypeError:
			return False
	else:
		return False
	client = ImgurClient(app.config['IMGUR_CLIENTID'],app.config['IMGUR_SECRET'])
	client.set_user_auth(access_token,refresh_token)
	try:
		client.get_account('me').url
		credits = client.credits
		# print(credits)
		if credits['ClientRemaining'] > 10 and credits['UserRemaining'] > 10:
			return True
		else:
			return None
	except ImgurClientError:
		return False
Пример #6
0
def uploadToImgur(userid,url):
	db = connect_db()
	c = db.cursor()
	c.execute('SELECT map_url,name,farmname,date,imgur_json FROM playerinfo WHERE url='+app.sqlesc,(url,))
	result = c.fetchone()
	if result[4] != None:
		previous_upload_properties = json.loads(result[4])
		if time.time() < previous_upload_properties['upload_time']+(2*3600):
			return {'error':'too_soon','link':previous_upload_properties['imgur_url']}
	map_url = result[0]
	titlestring = u"{} Farm, {} by {}".format(result[2],result[3],result[1])
	descriptionstring = u"Stardew Valley game progress, full summary at http://upload.farm/{}".format(url)
	# try:
	c.execute('SELECT imgur_json FROM users WHERE id='+app.sqlesc,(userid,))
	r = json.loads(c.fetchone()[0])
	access_token = r['access_token']
	refresh_token = r['refresh_token']
	client = ImgurClient(app.config['IMGUR_CLIENTID'],app.config['IMGUR_SECRET'])
	client.set_user_auth(access_token,refresh_token)
	# file = url_for('home',filename=map_url,_external=True)
	# print 'uploaded to',file
	# client.upload_from_url(file,config={'title':'uploaded from','description':'upload.farm'},anon=False)
	if app.config['IMGUR_DIRECT_UPLOAD'] == True:
		result = client.upload_from_path(map_url,config={'title':titlestring,'description':descriptionstring},anon=False)
	else:
		map_url = u"http://upload.farm/{}".format(map_url)
		result = client.upload_from_url(map_url,config={'title':titlestring,'description':descriptionstring},anon=False)
	print(result)
	imgur_json = json.dumps({'imgur_url':'http://imgur.com/'+result['id'],'upload_time':time.time()})
	c.execute('UPDATE playerinfo SET imgur_json='+app.sqlesc+' WHERE url='+app.sqlesc,(imgur_json,url))
	db.commit()
	try:
		return {'success':None,'link':result['link']}
	except:
		return {'error':'upload_issue','link':None}
Пример #7
0
def uploadToImgur(userid, url):
    db = connect_db()
    c = db.cursor()
    c.execute(
        'SELECT map_url,name,farmname,date,imgur_json FROM playerinfo WHERE url='
        + app.sqlesc, (url, ))
    result = c.fetchone()
    if result[4] != None:
        previous_upload_properties = json.loads(result[4])
        if time.time() < previous_upload_properties['upload_time'] + (2 *
                                                                      3600):
            return {
                'error': 'too_soon',
                'link': previous_upload_properties['imgur_url']
            }
    map_url = result[0]
    titlestring = u"{} Farm, {} by {}".format(result[2], result[3], result[1])
    descriptionstring = u"Stardew Valley game progress, full summary at http://upload.farm/{}".format(
        url)
    # try:
    c.execute('SELECT imgur_json FROM users WHERE id=' + app.sqlesc,
              (userid, ))
    r = json.loads(c.fetchone()[0])
    access_token = r['access_token']
    refresh_token = r['refresh_token']
    client = ImgurClient(app.config['IMGUR_CLIENTID'],
                         app.config['IMGUR_SECRET'])
    client.set_user_auth(access_token, refresh_token)
    # file = url_for('home',filename=map_url,_external=True)
    # print 'uploaded to',file
    # client.upload_from_url(file,config={'title':'uploaded from','description':'upload.farm'},anon=False)
    if app.config['IMGUR_DIRECT_UPLOAD'] == True:
        result = client.upload_from_path(map_url,
                                         config={
                                             'title': titlestring,
                                             'description': descriptionstring
                                         },
                                         anon=False)
    else:
        map_url = u"http://upload.farm/{}".format(map_url)
        result = client.upload_from_url(map_url,
                                        config={
                                            'title': titlestring,
                                            'description': descriptionstring
                                        },
                                        anon=False)
    print(result)
    imgur_json = json.dumps({
        'imgur_url': 'http://imgur.com/' + result['id'],
        'upload_time': time.time()
    })
    c.execute(
        'UPDATE playerinfo SET imgur_json=' + app.sqlesc + ' WHERE url=' +
        app.sqlesc, (imgur_json, url))
    db.commit()
    try:
        return {'success': None, 'link': result['link']}
    except:
        return {'error': 'upload_issue', 'link': None}
Пример #8
0
def getEntries(where=None):
	connection = connect_db()
	c = connection.cursor()
	if where==None:
		where=''
	c.execute('SELECT id,md5,url,savefileLocation FROM playerinfo '+where)
	entries = c.fetchall()
	connection.close()
	return entries
Пример #9
0
def getAuthUrl(userid,target=None):
	db = connect_db()
	c = db.cursor()
	iuid = unicode(uuid.uuid4())
	imgur_id = json.dumps({'id':iuid,'redir':target})
	c.execute('UPDATE users SET imgur_id='+app.sqlesc+' WHERE id='+app.sqlesc,(iuid,userid))
	db.commit()
	db.close()
	client = ImgurClient(app.config['IMGUR_CLIENTID'],app.config['IMGUR_SECRET'])
	authorization_url = client.get_auth_url('code')+'&state='+unicode(imgur_id)
	return authorization_url
Пример #10
0
def fetch_tags():
    """Grab all tags from Math.StackExchange, and add to our database."""
    con = connect_db()
    cur = con.cursor()

    func = "/tags"
    params = {
        'pagesize': 100,
        'order': 'asc',
        'sort': 'name'
    }
    
    process_each_page(func, params, insert_tags)
Пример #11
0
    def setUp(self):
        """Set up a blank test database before each test"""
        self.db_name = name_from_uri(os.environ['TEST_DATABASE_URL'])
        app.app.config.update(
            TESTING=True,
            SQLALCHEMY_DATABASE_URI=os.environ['TEST_DATABASE_URL']

        )
        upgrade(directory='migrations')

        self.app = app.app.test_client()
        app.init_db(self.db_name)
        self.cursor = app.connect_db()
Пример #12
0
def processFile(filename,old_md5,rowid,url):
	with open(filename,'rb') as f:
		md5_info = md5(f)
	player_info = playerInfo(filename)
	farm_info = getFarmInfo(filename)
	try:
		assert md5_info == old_md5
	except AssertionError:
		return False
		print filename,'failed md5'
	columns = []
	values = []
	for key in player_info.keys():
		if type(player_info[key]) == list:
			for i,item in enumerate(player_info[key]):
				columns.append(key.replace(' ','_') + str(i))
				values.append(str(item))
		elif type(player_info[key]) == dict:
			for subkey in player_info[key]:
				if type(player_info[key][subkey]) == dict:
					for subsubkey in player_info[key][subkey]:
						columns.append((key+subkey+subsubkey).replace(' ','_'))
						values.append((player_info[key][subkey][subsubkey]))
				else:
					columns.append((key + subkey).replace(' ','_'))
					values.append(str(player_info[key][subkey]))
		else:
			columns.append(key)
			values.append(str(player_info[key]))
	columns.append('farm_info')
	values.append(json.dumps(farm_info))
	columns.append('failed_processing')
	values.append(None)

	colstring = ''
	for c in columns:
		colstring += c+', '
	colstring = colstring[:-2]
	questionmarks = ((sqlesc+',')*len(values))[:-1]
	try:
		connection = connect_db()
		cur = connection.cursor()
		cur.execute('UPDATE playerinfo SET ('+colstring+') = ('+questionmarks+') WHERE id='+sqlesc,(tuple(values+[rowid])))
		cur.execute('INSERT INTO todo (task, playerid) VALUES ('+sqlesc+','+sqlesc+')',('process_image',rowid))
		connection.commit()
		connection.close()
		return True
	except (sqlite3.OperationalError, psycopg2.ProgrammingError) as e:
		cur.execute('INSERT INTO errors (ip, time, notes) VALUES ('+sqlesc+','+sqlesc+','+sqlesc+')',('reprocessEntry.py', time.time(),str(e)+' '+str([columns,values])))
		connection.commit()
		return False
Пример #13
0
def set_last_updated():
    """Mark the database as having last been updated now."""
    con = connect_db()
    cur = con.cursor()
    date = dt.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
    query = "select * from last_updated where description='questions';"
    if cur.execute(query) == 0:
        query = "insert into last_updated (description, date) values(%s, %s);"
        desc = 'questions'
        cur.execute(query, [desc, date])
    else:
        query = "update last_updated set date=%s where description='questions';"
        cur.execute(query, [date])
    con.commit()
    con.close()
Пример #14
0
def set_last_updated():
    """Mark the database as having last been updated now."""
    con = connect_db()
    cur = con.cursor()
    date = dt.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
    query = "select * from last_updated where description='questions';"
    if cur.execute(query) == 0:
        query = "insert into last_updated (description, date) values(%s, %s);"
        desc = 'questions'
        cur.execute(query, [desc, date])
    else:
        query = "update last_updated set date=%s where description='questions';"
        cur.execute(query, [date])
    con.commit()
    con.close()
Пример #15
0
def getAuthUrl(userid, target=None):
    db = connect_db()
    c = db.cursor()
    iuid = unicode(uuid.uuid4())
    imgur_id = json.dumps({'id': iuid, 'redir': target})
    c.execute(
        'UPDATE users SET imgur_id=' + app.sqlesc + ' WHERE id=' + app.sqlesc,
        (iuid, userid))
    db.commit()
    db.close()
    client = ImgurClient(app.config['IMGUR_CLIENTID'],
                         app.config['IMGUR_SECRET'])
    authorization_url = client.get_auth_url('code') + '&state=' + unicode(
        imgur_id)
    return authorization_url
Пример #16
0
def launched_by_known_user(context):
    with app.app_context():
        g.db = connect_db()
        user = AlexaUser(TEST_USER_ID)
        user.set_location(
            TEST_USER_DATA['latitude'],
            TEST_USER_DATA['longitude'],
            TEST_USER_DATA['city'],
        )
    data = base_alexa_request('LaunchRequest')
    #    with patch('app.alexa.AlexaUser.get_data_by_id') as mock_gdbi:
    #        mock_gdbi.return_value = TEST_USER_DATA
    context.response = context.client.post(
        '/alexa/',
        data=json.dumps(data),
        content_type='application/json',
    )
    assert context.response
Пример #17
0
def get_last_updated():
    """Get the MySQL datetime of the last update to the question list.

       Returns the epoch if no time is listed in the database.
    """
    con = connect_db()
    cur = con.cursor()
    query = "SELECT * FROM last_updated WHERE description='questions'"
    if cur.execute(query) > 0:
        ts = cur.fetchone()['date']
    else:
        ts = dt.fromtimestamp(0)
        cur.execute("""INSERT INTO last_updated (description, date) VALUES 
        ('questions', %s);""", [ts])

    con.commit()
    con.close()
    return ts
Пример #18
0
def launched_by_known_user(context):
    with app.app_context():
        g.db = connect_db()
        user = AlexaUser(TEST_USER_ID)
        user.set_location(
            TEST_USER_DATA['latitude'],
            TEST_USER_DATA['longitude'],
            TEST_USER_DATA['city'],
        )
    data = base_alexa_request('LaunchRequest')
#    with patch('app.alexa.AlexaUser.get_data_by_id') as mock_gdbi:
#        mock_gdbi.return_value = TEST_USER_DATA
    context.response = context.client.post(
        '/alexa/',
        data=json.dumps(data),
        content_type='application/json',
    )
    assert context.response
Пример #19
0
def get_last_updated():
    """Get the MySQL datetime of the last update to the question list.

       Returns the epoch if no time is listed in the database.
    """
    con = connect_db()
    cur = con.cursor()
    query = "SELECT * FROM last_updated WHERE description='questions'"
    if cur.execute(query) > 0:
        ts = cur.fetchone()['date']
    else:
        ts = dt.fromtimestamp(0)
        cur.execute(
            """INSERT INTO last_updated (description, date) VALUES 
        ('questions', %s);""", [ts])

    con.commit()
    con.close()
    return ts
Пример #20
0
def create_user(username='******', password='******', email='*****@*****.**',
                active=False):
    app.init_db()

    password = get_password_for_database(password)

    if active:
        sql = CREATE_ACTIVE_USER
    else:
        sql = CREATE_USER

    sql = sql % {
        'username': username,
        'password': password,
        'email': email,
    }

    db = app.connect_db()
    db.executescript(sql)
    db.commit()
Пример #21
0
def create_tag_categories():
    """Cluster MSE tags in to categories using sklearn AffinityPropogation.

       Any existing category system in the database will be overwritten.
    """
    con = connect_db()
    cur = con.cursor()

    query = """
    SELECT T.id, T.name, COUNT(Q.question_id) AS count FROM
    (
        SELECT tags.id, tags.name, COUNT(qt.question_id) AS count FROM tags
        JOIN question_tags AS qt ON qt.tag_id=tags.id
        WHERE tags.name NOT IN ('advice', 'applications', 'big-list', 
        'education', 'intuition', 'learning', 'math-history', 'math-software',
        'reference-request', 'self-learning', 'soft-question', 'teaching',
        'alternative-proof-strategy', 'proof-writing', 'visualization',
        'alternative-proof', 'proof-strategy', 'proof-verification',
        'solution-verification', 'definition', 'examples-counterexamples',
        'mathematica', 'wolfram-alpha', 'maple', 'matlab', 'sage', 'octave',
        'floor-function', 'ceiling-function', 'article-writing', 'publishing',
        'combinatorial-species', 'gromov-hyperbolic-spaces', 'chemistry',
        'book-recommendation')
        GROUP BY tags.name
    ) AS T
    JOIN question_tags AS Q ON T.id=Q.tag_id
    GROUP BY T.id"""
    cur.execute(query)
    tag_ids = []
    tag_names = []
    tag_indices = dict()
    tag_name_indices = dict()
    counts = []
    for q in cur:
        tag_ids.append(q['id'])
        tag_names.append(q['name'])
        tag_indices[q['id']] = len(tag_ids) - 1
        tag_name_indices[q['name']] = len(tag_ids) - 1
        counts.append(q['count'])

    tag_ids = np.array(tag_ids)
    tag_names = np.array(tag_names)

    query = """
    SELECT t1.id AS tag1, t2.id AS tag2, COUNT(qt1.question_id) as count
    FROM question_tags AS qt1
    JOIN question_tags AS qt2 ON qt1.question_id=qt2.question_id
    JOIN tags AS t1 ON t1.id=qt1.tag_id
    JOIN tags AS t2 ON t2.id=qt2.tag_id
    WHERE t1.id IN ({taglist}) AND t2.id IN ({taglist})
    GROUP BY t1.name, t2.name""".format(taglist=','.join(str(i) for i in tag_ids))
    cur.execute(query)

    paircounts = [[0 for i in range(len(tag_ids))] for j in range(len(tag_ids))]
    for q in cur:
        t1 = q['tag1']
        i1 = tag_indices[t1]
        t2 = q['tag2']
        i2 = tag_indices[t2]
        c = q['count']
        if i1 == i2:
            paircounts[i1][i1] = int(c/2)
        else:
            paircounts[i1][i2] = c

    sim = np.array(paircounts, dtype=np.float_)

    cluster = AffinityPropagation(affinity='precomputed', damping=0.5)

    labels = cluster.fit_predict(sim)

    classes = sorted(list(set(labels)))

    catnames = {i:tag_names[cluster.cluster_centers_indices_[i]] for i in \
            range(len(cluster.cluster_centers_indices_))}
    cur.execute("DELETE FROM categories WHERE 1;")
    cur.execute("DELETE FROM tag_categories WHERE 1;")

    query = "INSERT INTO categories (id,name) VALUES "
    catnames = [tag_names[cluster.cluster_centers_indices_[c]] for c in classes]
    query += ','.join("({},'{}')".format(c,catnames[c]) for c in classes)
    cur.execute(query)

    query = "INSERT INTO tag_categories (tag_id, category_id) VALUES "
    query += ','.join("({},{})".format(tag_ids[i], labels[i]) for i \
            in range(len(labels)))
    cur.execute(query)
    con.commit()
Пример #22
0
def process_api_questions(items, check_quality=True):
    """Add question/answer list to the database.

    items -- 'items' object returned from Math.SE query against questions with
             specific filters set (as in fetch_recent_questions)
    check_quality -- if True, questions are run through the effort predictor
    """
    if len(items) == 0:
        return
    con = connect_db()
    cur = con.cursor()
    qids = [item['question_id'] for item in items]
    qids.sort()
    query = "SELECT id FROM questions WHERE id IN ("
    query += ','.join([str(i) for i in qids])
    query += ');'
    cur.execute(query)
    existing = [q['id'] for q in cur]
    for item in items:
        question = {
                'id': item['question_id'],
                'body_html': item['body'],
                'body_markdown': item['body_markdown'],
                'creation_date': from_timestamp(item['creation_date']),
                'last_activity_date':from_timestamp(item['last_activity_date']),
                'link': item['link'],
                'score': item['score'],
                'title': item['title'],
                'historic': True
        }
        
        if 'owner' in item and len(item['owner']) > 0:
            question['author_id'] = item['owner']['user_id']
        else:
            question['author_id'] = None

        if 'accepted_answer_id' in item:
            question['accepted_answer_id'] = item['accepted_answer_id']
        else:
            question['accepted_answer_id'] = None

        if 'closed_date' in item:
            question['closed_date'] = from_timestamp(item['closed_date'])
            question['closed_desc'] = item['closed_details']['description']
            question['closed_reason'] = item['closed_reason']
        else:
            question['closed_date'] = None
            question['closed_desc'] = None
            question['closed_reason'] = None
    
        if question['id'] not in existing:
            query = 'INSERT INTO questions ('
            query += ','.join(sorted(question.keys()))
            query += ') VALUES ('
            query += ','.join('%s' for i in range(len(question.keys())))
            query += ');'
        else:
            query = "UPDATE questions SET "
            query += ','.join(k + '=%s' for k in sorted(question.keys()))
            query += ' WHERE id=' + str(question['id']) + ';'
        cur.execute(query, [question[k] for k in sorted(question.keys())])

        query = "DELETE FROM question_tags WHERE question_id=%s"
        cur.execute(query, [question['id']])

        query = "SELECT id FROM tags WHERE tags.name IN ("
        query += ','.join("'{}'".format(t) for t in item['tags'])
        query += ");"
        cur.execute(query)
        tagids = [t['id'] for t in cur]
        query = "INSERT INTO question_tags (question_id, tag_id) VALUES (%s,%s)"
        for tagid in tagids:
            cur.execute(query, [question['id'], tagid])

        if 'answers' in item:
            answers = []
            for answer in item['answers']:
                a = {
                        'id': answer['answer_id'],
                        'body_html': answer['body'],
                        'body_markdown': answer['body_markdown'],
                        'creation_date':from_timestamp(answer['creation_date']),
                        'is_accepted': answer['is_accepted'],
                        'last_activity_date': from_timestamp(answer['last_activity_date']),
                        'link': answer['link'],
                        'question_id': answer['question_id'],
                        'score': answer['score']
                }

                if 'owner' in answer and len(answer['owner']) > 0:
                    a['author_id'] = answer['owner']['user_id']
                else:
                    a['author_id'] = None
                
                answers.append(a)
            query = "SELECT id FROM answers WHERE id IN ("
            query += ','.join(str(a['id']) for a in answers)
            query += ') ORDER BY id ASC;'
            cur.execute(query)
            existinga = [a['id'] for a in cur]
            for a in answers:
                if a['id'] not in existinga:
                    query = 'INSERT INTO answers ('
                    query += ','.join(sorted(a.keys()))
                    query += ') VALUES ('
                    query += ','.join('%s' for i in range(len(a.keys())))
                    query += ');'
                else:
                    query = 'UPDATE answers SET '
                    query += ','.join(k + '=%s' for k in sorted(a.keys()))
                    query += ' WHERE id=' + str(a['id']) + ';'
                cur.execute(query, [a[k] for k in sorted(a.keys())])

    con.commit()
    
    if check_quality:
        query = "SELECT id, body_html FROM questions WHERE id IN ("
        query += ','.join([str(i) for i in qids])
        query += ') ORDER BY id ASC;'
        cur.execute(query)
        quals = []
        bodies = [item['body_html'] for item in cur]
        
        with open('psq.pickle', 'rb') as f:
            clf = pickle.load(f)
        probs = clf.predict_proba(bodies)[:, 0]
        for i in range(len(qids)):
            query = "UPDATE questions SET quality_score=%s WHERE id=%s".format(
                    int(100*probs[i]), qids[i])
            cur.execute(query, [int(100*probs[i]), qids[i]])
        con.commit()

    con.close()
Пример #23
0
def before_request():
    g.db = connect_db()
Пример #24
0
 def test_database(self):
     # conn = psycopg2.connect(database='flaskr_tdd')
     cursor = connect_db()
     # TODO: figure out a better string of sql commands
     cursor.execute("select relname from pg_class where relkind='r' and relname !~ '^(pg_|sql_)';")
     self.assertIsInstance(cursor.fetchall(), list)
Пример #25
0
def reset_db():
    """Drop all tables and create all tables"""
    connect_db(app)
    db.drop_all()
    db.create_all()
Пример #26
0
def init_db():
    with closing(connect_db()) as client:
        for item in DUMMY_DATA:
            client.todo.items.insert(item)
Пример #27
0
def build_psq_classifier(end_date_str=None):
    """Build a predictor of whether or not a question will be closed as 
    homework / for 'lack of context'.  This is accomplished by building a 
    linear SVC model, trained on old post data. 

    If end_date_str isn't specified, it is initialized to two weeks prior.

    Pickles the classifier, an instance of sklearn.svm.LinearSVC. Also stores
    some basic data metrics.

    Note that we only use posts written after 2013-06-25, the date on which 
    the first such closure reason was instituted.
    """

    if end_date_str == None:
        ts = time() - 60 * 60 * 24 * 14
        end_date_str = from_timestamp(ts)

    con = connect_db()
    cur = con.cursor()

    trf = TfidfVectorizer(
            ngram_range=(2,6),
            stop_words='english',
            analyzer='char',
            preprocessor=preprocess_post
        )

    reg = LogisticRegression()

    clf = Pipeline([('vectorizer', trf), ('reg', reg)])
    
    X_raw = []
    Y_raw = []

    # Fetch closed questions from database
    query = """SELECT * FROM questions WHERE creation_date < '{}' AND 
               closed_reason='off-topic' AND (closed_desc LIKE '%context%'
               OR closed_desc LIKE '%homework%');""".format(end_date_str)

    cur.execute(query)

    for q in cur:
        X_raw.append(q['body_html'])
        Y_raw.append(1)

    num_closed = len(X_raw)

    # Fetch an equal number of un-closed questions
    query = """SELECT * FROM questions WHERE creation_date < %s AND 
               closed_reason IS NULL ORDER BY creation_date LIMIT %s"""
    
    cur.execute(query, [end_date_str, num_closed])

    for q in cur:
        X_raw.append(q['body_html'])
        Y_raw.append(0)
    
    X_raw = [X_raw[i] for i in shuff]
    Y_raw = [Y_raw[i] for i in shuff]
   

    # Hold back 20% of examples as test set
    X_train, X_test, Y_train, Y_test = train_test_split(
            X_raw, Y_raw, test_size=0.2)

    test_size = len(X_test)
    train_size = len(X_train)

    # Perform grid search to tune parameters for F1-score
    params = [
            {
                'vectorizer__ngram_range': [(2,2), (2,4), (2,6), (2,8)],
                'reg__penalty': ['l1', 'l2'],
                'reg__C': [.01, .03, .1, .3, 1, 3, 10, 30, 100],
                'reg__intercept_scaling': [.1,1,10,100]
            }
        ]


    gridsearch = GridSearchCV(clf, params, scoring='f1', n_jobs=4, \
            pre_dispatch=8)

    gridsearch.fit(X_train, Y_train)
    clf = gridsearch.best_estimator_
    print("Done training classifier!")
    print("Parameters from CV:")
    for k,v in gridsearch.best_params_.items():
        print("{}: {}".format(k,v))
    preds = clf.predict(X_test)
    print("Done making predictions for test set.")
    print("Results:")

    clf.stats = dict()
    clf.stats['train_size'] = train_size
    clf.stats['train_pos'] = np.sum(Y_train)
    clf.stats['train_neg'] = train_size - np.sum(Y_train)
    clf.stats['test_size'] = test_size
    clf.stats['test_pos'] = np.sum(Y_test)
    clf.stats['test_neg'] = test_size - np.sum(Y_test)
    clf.stats['accuracy'] = clf.score(X_test, Y_test)
    clf.stats['precision'] = precision_score(Y_test, preds)
    clf.stats['recall'] = recall_score(Y_test, preds)
    for k in clf.stats:
        print("  {}: {}".format(k, clf.stats[k]))

    with open('psq.pickle', 'wb') as f:
        pickle.dump(clf, f)
Пример #28
0
 def setUp(self):
     self.db_client = connect_db(db="test-temp")
Пример #29
0
def create_tag_categories():
    """Cluster MSE tags in to categories using sklearn AffinityPropogation.

       Any existing category system in the database will be overwritten.
    """
    con = connect_db()
    cur = con.cursor()

    query = """
    SELECT T.id, T.name, COUNT(Q.question_id) AS count FROM
    (
        SELECT tags.id, tags.name, COUNT(qt.question_id) AS count FROM tags
        JOIN question_tags AS qt ON qt.tag_id=tags.id
        WHERE tags.name NOT IN ('advice', 'applications', 'big-list', 
        'education', 'intuition', 'learning', 'math-history', 'math-software',
        'reference-request', 'self-learning', 'soft-question', 'teaching',
        'alternative-proof-strategy', 'proof-writing', 'visualization',
        'alternative-proof', 'proof-strategy', 'proof-verification',
        'solution-verification', 'definition', 'examples-counterexamples',
        'mathematica', 'wolfram-alpha', 'maple', 'matlab', 'sage', 'octave',
        'floor-function', 'ceiling-function', 'article-writing', 'publishing',
        'combinatorial-species', 'gromov-hyperbolic-spaces', 'chemistry',
        'book-recommendation')
        GROUP BY tags.name
    ) AS T
    JOIN question_tags AS Q ON T.id=Q.tag_id
    GROUP BY T.id"""
    cur.execute(query)
    tag_ids = []
    tag_names = []
    tag_indices = dict()
    tag_name_indices = dict()
    counts = []
    for q in cur:
        tag_ids.append(q['id'])
        tag_names.append(q['name'])
        tag_indices[q['id']] = len(tag_ids) - 1
        tag_name_indices[q['name']] = len(tag_ids) - 1
        counts.append(q['count'])

    tag_ids = np.array(tag_ids)
    tag_names = np.array(tag_names)

    query = """
    SELECT t1.id AS tag1, t2.id AS tag2, COUNT(qt1.question_id) as count
    FROM question_tags AS qt1
    JOIN question_tags AS qt2 ON qt1.question_id=qt2.question_id
    JOIN tags AS t1 ON t1.id=qt1.tag_id
    JOIN tags AS t2 ON t2.id=qt2.tag_id
    WHERE t1.id IN ({taglist}) AND t2.id IN ({taglist})
    GROUP BY t1.name, t2.name""".format(taglist=','.join(
        str(i) for i in tag_ids))
    cur.execute(query)

    paircounts = [[0 for i in range(len(tag_ids))]
                  for j in range(len(tag_ids))]
    for q in cur:
        t1 = q['tag1']
        i1 = tag_indices[t1]
        t2 = q['tag2']
        i2 = tag_indices[t2]
        c = q['count']
        if i1 == i2:
            paircounts[i1][i1] = int(c / 2)
        else:
            paircounts[i1][i2] = c

    sim = np.array(paircounts, dtype=np.float_)

    cluster = AffinityPropagation(affinity='precomputed', damping=0.5)

    labels = cluster.fit_predict(sim)

    classes = sorted(list(set(labels)))

    catnames = {i:tag_names[cluster.cluster_centers_indices_[i]] for i in \
            range(len(cluster.cluster_centers_indices_))}
    cur.execute("DELETE FROM categories WHERE 1;")
    cur.execute("DELETE FROM tag_categories WHERE 1;")

    query = "INSERT INTO categories (id,name) VALUES "
    catnames = [
        tag_names[cluster.cluster_centers_indices_[c]] for c in classes
    ]
    query += ','.join("({},'{}')".format(c, catnames[c]) for c in classes)
    cur.execute(query)

    query = "INSERT INTO tag_categories (tag_id, category_id) VALUES "
    query += ','.join("({},{})".format(tag_ids[i], labels[i]) for i \
            in range(len(labels)))
    cur.execute(query)
    con.commit()
Пример #30
0
from unittest import TestCase
from app import db, connect_db, app
from models import User, Favorite, Song, Playlist
from info.api_samples import SONG_JSON

app.config['SQLALCHEMY_DATABASE_URI'] = 'postgresql:///phonia_music_test'
app.config['SQLALCHEMY_ECHO'] = False
app.config['WTF_CSRF_ENABLED'] = False

connect_db(app)
db.drop_all()
db.create_all()

class UserViewsTestCase(TestCase):

    def setUp(self):
        User.query.delete()
        Favorite.query.delete()
        Song.query.delete()
        db.session.commit()

        self.username = "******"
        self.user_password = "******"
        self.user_full_name = "TEST MAN"
        self.user = User.signup(username=self.username, password=self.user_password, full_name=self.user_full_name)

        db.session.add(self.user)
        db.session.commit()

        self.user_id = self.user.id