def train(self, emails, w2v, epochs=10, save_model=True): loss_criteria = nn.MSELoss() optimizer = optim.RMSprop(self.parameters(), lr=0.0001, alpha=0.99, momentum=0.0) # optimizer = optim.Adam(self.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) email_reps = w2v.get_email_reps(emails, average=True) for epoch in range(epochs): print 'running epoch ', epoch start = time.time() epoch_loss = 0.0 for i in range(len(emails)): sender_id = utils.get_userid(emails[i, constants.SENDER_EMAIL]) # if no word_rep was found for any of the words in the emails, ignore this case if type(email_reps[i]) == type(None): continue # gets the average email embedding based on word embeddings of all the words in the mail email_rep = email_reps[i] recv_list = emails[i, constants.RECEIVER_EMAILS].split('|') for recv in recv_list: optimizer.zero_grad() recv_id = utils.get_userid(recv) # if sender or receiver is not an enron email id, we ignore this data point if sender_id is None or recv_id is None: continue # if valid sender and receiver pairs have been found update their frequencies self.emailid_train_freq[emails[ i, constants.SENDER_EMAIL]] = self.emailid_train_freq.get( emails[i, constants.SENDER_EMAIL], 0) + 1 self.emailid_train_freq[ recv] = self.emailid_train_freq.get(recv, 0) + 1 # do the forward pass pred_email_rep = self.forward( autograd.Variable(torch.LongTensor([sender_id])), autograd.Variable(torch.LongTensor([recv_id]))) # compute the loss loss = loss_criteria( pred_email_rep, autograd.Variable(torch.from_numpy(email_rep))) # propagate the loss backward and compute the gradient loss.backward() # change weights based on gradient value optimizer.step() epoch_loss += loss.data.numpy() end = time.time() print 'time taken ', (end - start) print 'loss in epoch ' + str(epoch) + ' = ' + str(epoch_loss) if save_model: file_name = constants.RUN_ID + '_model.pth' self.save(file_name) email_ids, embs = self.extract_user_embeddings() utils.save_user_embeddings(email_ids, embs) # utils.get_similar_users(email_ids, embs) plots.plot_with_tsne(email_ids, embs, display_hover=False)
def main(args): psr_id = utils.get_pulsarid(args.psrname) if args.remove_all: clear_curators(psr_id) else: to_add_ids = [utils.get_userid(username) for username in \ args.to_add] if args.add_wild: to_add_ids.append(None) to_rm_ids = [utils.get_userid(username) for username in \ args.to_remove] if args.remove_wild: to_rm_ids.append(None) update_curators(psr_id, to_add_ids, to_rm_ids)
def create_post_view(): user_id = get_userid() if not user_id: print("USER ID WAS BAD") return render_template("login.html") # token_conn = TokenTable() # user_id = token_conn.get_uuid(cookie) acc = Account.init_from_uuid(user_id) res = request.args print(res) print(request.form) if not res: return render_template("edit_post.html") res = dict(res) res['is_request'] = True res['length'] = res.get('duration') res['date'] = res.get('start_date') res['skill_set'] = res.get('skillset', []) post = acc.create_post(**res) # print("POSTS CREATE NEW") post = post.to_dict() # print(post) return redirect("/posts")
def create_new_post(): user_id = get_userid() if not user_id: print("USER ID WAS BAD") return json.dumps({"status": "failure"}) # token_conn = TokenTable() # user_id = token_conn.get_uuid(cookie) acc = Account.init_from_uuid(user_id) res = request.json res = dict(res) print(res) res['is_request'] = True res['length'] = res.get('duration') res['date'] = res.get('start_date') res['skill_set'] = res.get('skillset', []) res['tags'] = res.get('tags', "").split(',') post = acc.create_post(**res) print("POSTS CREATE NEW!") post = post.to_dict() print(post) post['status'] = 'success' return json.dumps(post)
def add_member(): form = {k: request.form[k].strip() for k in request.form} if 'user_id' not in form: assert 'user_username' in form if utils.validate_username(form['user_username']): return Validity( False, 'User ' + form['user_username'] + ' does not exist.').get_resp() form['user_id'] = utils.get_userid(form['user_username']) if not utils.validate_userid(int(form['user_id'])): return Validity(False, 'Invalid user.').get_resp() if utils.validate_groupid(group_id=int(form['group_id'])): if utils.validate_groupreqs(int(form['user_id']), int(form['group_id'])): return Validity(False, 'Invitation already sent.').get_resp() if not utils.validate_membership(int(form['user_id']), int(form['group_id'])): group = Group.query.filter_by(id=int(form['group_id'])).first() group.add_memberReq(int(form['user_id'])) db.session.commit() return Validity(True).get_resp() else: return Validity(False, 'Already in the group').get_resp() else: return Validity(False, 'Invalid group id').get_resp()
def signup(): """ email (string) password (string) **not hashed yet! """ # similar to the login route cookie = request.cookies.get(TOKEN_NAME) # # no cookie if not cookie: return render_template("signup.html") token_conn = TokenTable() user_id = token_conn.get_uuid(cookie) account = Account.init_from_uuid(user_id) # if they are logge din with valid cookie if user_id and cookie and token_conn.validate(user_id, cookie): return render_template("signup.html", token_uuid=get_userid(), logged_in=True, **account.to_dict()) return render_template("signup.html")
def authenticate(self, fb_code): try: fb_uid = get_userid(fb_code) profile = FBProfile.objects.get(fb_userid=fb_uid) return profile.user except ValueError: return None except ObjectDoesNotExist: return None
def predict(self, email, w2v, label=None, training_mode=False): loss_criteria = nn.CrossEntropyLoss() sender_id = utils.get_userid(email[constants.SENDER_EMAIL]) email_content = email[constants.EMAIL_BODY] # skip if the sender does not have an embedding or there are no words in the email if sender_id is None or email_content is None: return 0, False # gets the average email embedding based on word embeddings of all the words in the mail email_rep = np.array(w2v.get_sentence(email[2])) if email_rep.shape[0]: email_rep = np.mean(email_rep, axis=0).reshape(1, -1) else: return 0, False recv_list = email[1].split('|') recv_ids = [] for recv in recv_list: recv_id = utils.get_userid(recv) if recv_id is not None: recv_ids.append(recv_id) self.emailid_train_freq[recv] = self.emailid_train_freq.get( recv, 0) + 1 # if none of the receivers were found, ignore this case if len(recv_ids) == 0: return 0, False # if the sender was found and is being used for training update his freq count self.emailid_train_freq[email[ constants.SENDER_EMAIL]] = self.emailid_train_freq.get( email[constants.SENDER_EMAIL], 0) + 1 # do the forward pass pred_out = self.forward(sender_id, recv_ids, email_rep) # compute the loss if training_mode: loss = loss_criteria(pred_out, label) return loss, True else: out_probs = nn.Softmax()(pred_out) return out_probs, True
def add_to_posts(): """ we are going to have some filtering going on... """ # get post id from request, create post object, add a volunteer to the post object, update post_id = request.json.post_id post = Post.init_from_uid(post_id) uuid = get_userid() post.add_volunteer(uuid) post.update_in_db() return render_template("posts.html")
def predict(self, email, w2v): loss_criteria = nn.MSELoss() sender_id = utils.get_userid(email[constants.SENDER_EMAIL]) email_content = email[constants.EMAIL_BODY] # skip if the sender does not have an embedding or there are no words in the email if sender_id is None or email_content is None: return 0, False recv_list = email[1].split('|') recv_ids = [] for recv in recv_list: recv_id = utils.get_userid(recv) if recv_id is not None: recv_ids.append(recv_id) self.emailid_train_freq[recv] = self.emailid_train_freq.get( recv, 0) + 1 # if none of the receivers were found, ignore this case if len(recv_ids) == 0: return 0, False # if the sender was found and is being used for training update his freq count self.emailid_train_freq[email[ constants.SENDER_EMAIL]] = self.emailid_train_freq.get( email[constants.SENDER_EMAIL], 0) + 1 # get word representations from glove word2vec email_word_reps = w2v.get_sentence(email_content) # generate a matrix that will contain all combinations of w_j-1,w_j+1 - > w_j prev_next_embs, curr_embs = self.generate_all_combinations( email_word_reps) if len(curr_embs) == 0: return 0, False # do the forward pass pred_word_reps = self.forward(sender_id, recv_ids, prev_next_embs) # compute the loss loss = loss_criteria(pred_word_reps, autograd.Variable(torch.from_numpy(curr_embs))) return loss, True
def grab_post(): user_id = get_userid() if not user_id: return FAIL_MSG user = Account.init_from_uuid(user_id) if not user: return FAIL_MSG post_id = request.json["post_id"] post = Post.init_from_uid(post_id) if not post.add_volunteer(user_id): return FAIL_MSG print("!!!!!!!!!!!!!!!!!!!!!!!!!") post.update_in_db() return json.dumps({"status": "success"})
def deny_friendReqs(): form = {k: request.form[k].strip() for k in request.form} if 'friend_id' not in form: assert 'friend_username' in form if utils.validate_username(form['friend_username']): return Validity( False, 'User ' + form['friend_username'] + ' does not exist.').get_resp() form['friend_id'] = utils.get_userid(form['friend_username']) if not utils.validate_friendreqs(int(form['friend_id']), int(current_user.id)): return Validity(False, 'Request does not exist.').get_resp() friend = User.query.filter_by(id=int(form['friend_id'])).first() friend.deny_friendReq(int(current_user.id)) db.session.commit() return Validity(True).get_resp()
def extract_user_embeddings(self, threshold=1): """ saves the user embeddings as a dictionary key: emailId, value user embeddings :return: """ all_email_ids = utils.get_user_emails() email_ids = [] embeddings = [] for e_id in all_email_ids: if self.emailid_train_freq.get(e_id, 0) < threshold: continue email_ids.append(e_id) uid = utils.get_userid(e_id) emb = self.embedding_layer( autograd.Variable(torch.LongTensor([uid]))) emb_np = emb.data.numpy().reshape(-1) embeddings.append(emb_np) return email_ids, np.array(embeddings)
def touch_timfile(timfile_id, existdb=None): """Update the mod_time of the timfile. Inputs: timfile_id: The ID of the timfile to touch. Outputs: None """ db = existdb or database.Database() db.connect() values = {'user_id': utils.get_userid(), \ 'add_time': datetime.datetime.now()} update = db.timfiles.update().\ where(db.timfiles.c.timfile_id==timfile_id) results = db.execute(update, values) results.close() if not existdb: db.close()
def delete_member(): form = {k: request.form[k].strip() for k in request.form} if 'user_id' not in form: assert 'user_username' in form if utils.validate_username(form['user_username']): return Validity( False, 'User ' + form['user_username'] + ' does not exist.').get_resp() form['user_id'] = utils.get_userid(form['user_username']) if utils.validate_groupid(group_id=int(form['group_id'])): if utils.validate_membership(int(form['user_id']), int( form['group_id'])) and not utils.validate_ownership( int(form['user_id']), int(form['group_id'])): group = Group.query.filter_by(id=int(form['group_id'])).first() group.delete_member(int(form['user_id'])) db.session.commit() return Validity(True).get_resp() else: return Validity(False, 'Can not quit the group').get_resp() else: return Validity(False, 'Invalid group id').get_resp()
def add_friend(): form = {k: request.form[k].strip() for k in request.form} if 'friend_id' not in form: assert 'friend_username' in form if utils.validate_username(form['friend_username']): return Validity( False, 'User ' + form['friend_username'] + ' does not exist.').get_resp() form['friend_id'] = utils.get_userid(form['friend_username']) if utils.validate_friendship(int(current_user.id), int(form['friend_id'])): return Validity( False, 'User ' + form['friend_username'] + ' has already been your friend.').get_resp() if utils.validate_friendreqs(int(current_user.id), int(form['friend_id'])): return Validity(False, 'Request already sent.').get_resp() # friend = User.query.filter_by(id = int(form['friend_id'])).first() current_user.add_friendReq(int(form['friend_id'])) # friend.add_friend(int(current_user.id)) db.session.commit() return Validity(True).get_resp()
def update_group(): form = {k: request.form[k].strip() for k in request.form} if 'owner_id' not in form and 'owner_username' in form: #assert 'owner_username' in form if utils.validate_username(form['owner_username']): return Validity( False, 'User ' + form['owner_username'] + ' does not exist.').get_resp() form['owner_id'] = utils.get_userid(form['owner_username']) if utils.validate_groupid(int(form['group_id'])): if utils.validate_ownership(int(current_user.id), int(form['group_id'])): group = Group.query.filter_by(id=int(form['group_id'])).first() group.update(name=(None if 'name' not in form else form['name']), owner_id=(None if 'owner_id' not in form else int( form['owner_id'])), info=(None if 'info' not in form else form['info'])) db.session.commit() return Validity(True, group.get_info_map()).get_resp() else: return Validity(False, 'No access').get_resp() else: return Validity(False, 'Invalid group id').get_resp()
def replace_rawfile(obsolete_id, replace_id, comments, existdb=None): """In the database, mark an obsolete data file as being replaced. Inputs: obsolete_id: The rawfile_id of the data file being replaced. replace_id: The rawfile_id of the replacement data file. comments: A comment describing the replacement. existdb: An (optional) existing database connection object. (Default: Establish a db connection) Outputs: None """ # Connect to the database db = existdb or database.Database() db.connect() # Check if obsolete_id exists in rawfiles. If not, fail. select = db.select([db.rawfiles.c.rawfile_id, \ db.replacement_rawfiles.c.replacement_rawfile_id.\ label("existing_replace_id")], \ from_obj=[db.rawfiles. \ outerjoin(db.replacement_rawfiles, \ onclause=db.replacement_rawfiles.c.obsolete_rawfile_id == \ db.rawfiles.c.rawfile_id)]).\ where(db.rawfiles.c.rawfile_id == obsolete_id) result = db.execute(select) rows = result.fetchall() if len(rows) > 1: raise errors.InconsistentDatabaseError("There are multiple (%d) " \ "rawfiles with ID=%d. Each ID should be unique!" % \ (len(rows), obsolete_id)) elif len(rows) != 1: raise errors.BadInputError("The obsolete rawfile being replaced " \ "(ID:%d) does not exist!" % obsolete_id) row = rows[0] # There is only one row # Check if obsolete_id is already replaced. If so, list replacement and fail. if row['existing_replace_id'] is not None: raise errors.RawfileSuperseded("The rawfile (ID=%d) has already been " \ "replaced by ID=%d. Perhaps it is the " \ "latter file that should be replaced, or " \ "perhaps no additional replacement is " \ "required." % \ (obsolete_id, row['existing_replace_id'])) # Log the replacement user_id = utils.get_userid() ins = db.replacement_rawfiles.insert() values = {'obsolete_rawfile_id':obsolete_id, \ 'replacement_rawfile_id':replace_id, \ 'user_id':user_id, \ 'comments':comments} result = db.execute(ins, values) result.close() # Check if obsolete_id is itself a replacement for other files # If so, mark all with newest replacement and # append comment (tag with date/time)? select = db.select([db.replacement_rawfiles.c.obsolete_rawfile_id, \ db.replacement_rawfiles.c.comments]).\ where(db.replacement_rawfiles.c.replacement_rawfile_id == \ obsolete_id) result = db.execute(select) rows = result.fetchall() result.close() user = utils.get_userinfo() for row in rows: newcomments = row['comments']+"\n%s (%d -> %d at %s): %s" % \ (user, obsolete_id, replace_id, utils.Give_UTC_now(), comments) values = {'replacement_rawfile_id':replace_id, \ 'comments':newcomments} update = db.replacement_rawfiles.c.update().\ where(db.replacement_rawfiles.c.replacement_rawfile_id == \ obsolete_id) results = db.execute(update, values) results.close()