def extract_java_relevant_ids_from_postlink(java_id_set): sql = "SELECT * FROM post_links" con = mdb.connect('localhost', 'root', 'root', '05-Sep-2018-SO') cur = con.cursor() id_dict = {} cnt = 0 try: cur.execute(sql) results = cur.fetchall() for row in results: postId = row[2] related_postId = row[3] if postId in java_id_set and related_postId in java_id_set: if postId not in id_dict: id_dict[postId] = True if related_postId not in id_dict: id_dict[related_postId] = True cnt += 1 if cnt % 10000 == 0: print('Processing %s...' % cnt, get_current_time()) except Exception as e: print e cur.close() con.close() print("# relevant qid = %s" % len(id_dict), get_current_time()) return sorted(list(id_dict.keys()))
def preprocessing(qlist): print "preprocessing...", get_current_time() for i in range(len(qlist)): qlist[i] = preprocessing_for_que(qlist[i]) if i % 1000 == 0: print "preprocessing %s question..." % i, get_current_time() return qlist
def build_IDF_vocabulary(): qlist = read_all_questions_from_repo() total_num = len(qlist) voc = {} count = 0 for q in qlist: title_wlist = word_tokenize(q.title.strip()) cur_word_set = set() for w in title_wlist: if w not in cur_word_set: cur_word_set.add(w) if w not in voc.keys(): voc[w] = 1.0 else: voc[w] = voc[w] + 1.0 # body_wlist = word_tokenize(q.body.strip()) # for w in body_wlist: # if w not in cur_word_set: # cur_word_set.add(w) # if w not in voc.keys(): # voc[w] = 1.0 # else: # voc[w] = voc[w] + 1.0 count += 1 if count % 10000 == 0: print 'processing %s unit...' % count, get_current_time() for key in voc.keys(): idf = math.log(total_num / (voc[key] + 1.0)) voc[key] = idf sorted_voc = sorted(voc.items(), key=operator.itemgetter(1)) return sorted_voc
def load_java_qid_set(csv_fpath): id_set = set() df = pd.read_csv(csv_fpath, header=None) for idx, row in df.iterrows(): id_set.add(row[0]) print('#java questions = %s' % len(id_set), get_current_time()) return id_set
def save_timeseries(self, analyse_type, description, empty_data, end_time, file_type, name, series_data, start_time, source_file, window=QMessageBox()): """ Create a time-series record in the database :param analyse_type: :param description: :param empty_data: :param end_time: :param file_type: :param name: :param series_data: :param start_time: :param source_file: File path :return: """ try: from datetime import datetime with self.connection.cursor() as cursor: timeseries_query = "INSERT INTO fpat_db.fpat_timeseries (created_at, analyse_type, description,"\ " empty_data, end_time, file_type, name, series_data, start_time, source_file)"\ " VALUES({created_at}, '{analyse_type}', '{description}', '{empty_data}',"\ " {end_time}, '{file_type}', '{name}', {series_data}, {start_time}, '{source_file}'"\ ");".format(created_at=get_current_time(), analyse_type=analyse_type, description=description, empty_data=empty_data, end_time=end_time, file_type=file_type, name=name, series_data=series_data, start_time=start_time, source_file=source_file ) cursor.execute(timeseries_query) timeseries_id = cursor.lastrowid self.connection.commit() return timeseries_id except Exception as ex: print(ex) QMessageBox.about(window, "Warning", "Database Error !!!") return False
def save_product_record(self, name, code, categorya, categoryb, info1="NULL", info2='NULL', info3='NULL', info4='NULL', info5='NULL', info6='NULL', info7='NULL'): """ # Create a record in product table :param name: :param code: :param categorya: :param categoryb: :param info1: :param info2: :param info3: :param info4: :param info5: :param info6: :param info7: :return: """ try: with self.connection.cursor() as cursor: query = "insert into fpat_product(created_at, name,code, categorya, categoryb, info1,info2, info3, "\ "info4, info5, info6, info7) values({created_at}, '{name}', '{code}', '{categorya}', "\ "'{categoryb}', '{info1}', '{info2}', '{info3}', '{info4}', '{info5}', '{info6}'"\ ", '{info7}')".format(created_at=get_current_time(), name=name, code=code, categorya=categorya, categoryb=categoryb, info1=info1, info2=info2, info3=info3, info4=info4, info5=info5, info6=info6, info7=info7 ) cursor.execute(query) self.connection.commit() cursor.close() return True except Exception as ex: return False
def insert_qlist_to_table(qlist, tablename): print "start to insert...", get_current_time() con = mdb.connect('localhost', 'root', '123456', 'answerbot') cur = con.cursor() count = 1 for q in qlist: try: # id,title,body,tag title = mdb.escape_string(q.title) body = mdb.escape_string(q.body) tag = mdb.escape_string(','.join(q.tag)) sql = "INSERT INTO %s VALUES('%s', '%s', '%s', '%s')" % (tablename, q.id, title, body, tag) cur.execute(sql) con.commit() count += 1 if count % 1000 == 0: print('Inserting ' + str(count) + ' question to Table %s' % tablename, get_current_time()) except Exception as e: print "id %s\ntitle %s\nbody %s\ntag %s\n" % (q.id, q.title, q.body, q.tag) print e cur.close() con.close() print('Insert finished.', get_current_time()) return
def create_evaluation(self, product_id, attachment_one, attachment_two, window=QMessageBox()): """ Create a new Evaluation of a Product. :param product_id: :param attachment_one: :param attachment_two: :return: evalution id Steps to create an Evaluation: 1. Create Evaluation 2. Update the Evaluation_Id in Product 3. Create all Evaluation Answers 4. Add every answer to the evaluation_evaluation_answer """ try: with self.connection.cursor() as cursor: # Create Evaluation evalution_query = "INSERT INTO fpat_evalutions (created_at, attachment_one,"\ " attachment_two) VALUES({created_at}, '{attachment_one}', "\ "'{attachment_two}');".format(created_at=get_current_time(), attachment_one=attachment_one, attachment_two=attachment_two) cursor.execute(evalution_query) evalution_id = cursor.lastrowid self.connection.commit() # Update the Evaluation_Id in Product product_query = "UPDATE fpat_product SET evalutions_ID='{evalutions_ID}', "\ "evaluated={evaluated} WHERE id={id};".format(evaluated=1, evalutions_ID=evalution_id, id=product_id) cursor.execute(product_query) self.connection.commit() cursor.close() return evalution_id except Exception as ex: print(ex) QMessageBox.about(window, "Warning", "Database Error !!!") return False
import os import sys root_path = "/home/ubuntu/answerbot-tool/src" sys.path.append(root_path) from gensim.models.word2vec import Word2Vec, LineSentence from utils.time_utils import get_current_time corpus_fpath = '../_1_preprocessing/corpus.txt' print 'start time : ', get_current_time() sentences = LineSentence(corpus_fpath) print "begin training..." # size is the dimensionality of the feature vectors. # window is the maximum distance between the current and predicted word within a sentence. # min_count = ignore all words with total frequency lower than this. # workers = use this many worker threads to train the model (=faster training with multicore machines). model = Word2Vec(sentences, size=200, window=5, min_count=0, workers=4, iter=100) model.save('model') print 'end time : ', get_current_time()
def login(self, request): """登录接口""" if 'HTTP_X_FORWARDED_FOR' in request.META: request_ip = request.META['HTTP_X_FORWARDED_FOR'] else: request_ip = request.META['REMOTE_ADDR'] username = request.data.get('username', '') password = request.data.get('password', '') verify_code = request.data.get('verify_code', '') # 失败次数和锁定时间 login_fail_limit_times = settings.LOGIN_FAILED_TIMES_LIMIT lock_time = settings.LOCK_TIME # 响应状态码和数据 result_code = 1 result_data = {'result_msg': '', 'need_verify': False} # 判断用户是否存在 exist_user = User.objects.filter(username=username).first() # 需要验证码首先验证验证码 if cache.get('verify_%s' % request_ip) is not None: result_data['need_verify'] = True verify = cache.get('verify_%s' % request_ip) if str.lower(verify).encode('utf-8') != str.lower(verify_code).encode('utf-8'): # 重新生成验证码 img, code = gvcode.generate() img.save(settings.VERIFY_IMG_PATH) cache.set('verify_%s' % request_ip, code, 1 * 24 * 60 * 60) result_code = 1 result_data['result_msg'] = '对不起,验证码错误!' # 生成验证码 img, code = gvcode.generate() img.save(settings.VERIFY_IMG_PATH) cache.set('verify_%s' % request_ip, code, 1 * 24 * 60 * 60) result_data['verify_url'] = settings.VERIFY_IMG_URL return Response({'result_code': result_code, 'result_data': result_data}) else: result_data['need_verify'] = False # 验证码通过判断登录信息 # 用户不存在 if not exist_user: result_code = 1 result_data['result_msg'] = '对不起,该用户不存在!' # 用户未激活 elif exist_user.is_active is False: result_code = 1 result_data['result_msg'] = '用户未激活,请联系管理员!' # 用户被锁定 elif cache.get('error_login_lock_%s_%s' % (request_ip, username)): time_stmap = cache.get('error_login_lock_%s_%s' % (request_ip, username)) lock_surplus_second = int(time_stmap) - int(time.time()) result_code = 2 result_data['result_msg'] = '用户已经锁定,请%s秒后再试。' % lock_surplus_second result_data['lock_surplus_second'] = lock_surplus_second else: # 登录认证 user = authenticate(username=username, password=password) # 登录失败 if user is None: # 记录登录者的IP和域名 error_login_data = cache.get('error_login_%s_%s' % (request_ip, username)) if error_login_data is None: result_code = 1 cache.set('error_login_%s_%s' % (request_ip, username), 0, lock_time * 60) result_data['result_msg'] = '登录失败,用户名密码错误!' else: new_error_login_data = int(error_login_data) + 1 cache.set('error_login_%s_%s' % (request_ip, username), new_error_login_data, lock_time * 60) if new_error_login_data < login_fail_limit_times: result_data['result_msg'] = '账户密码错误,再输入%s次用户将会锁定%s分钟。' \ % (login_fail_limit_times - new_error_login_data, lock_time) if new_error_login_data > 1: result_data['need_verify'] = True else: # 锁定用户 cache.set('error_login_lock_%s_%s' % (request_ip, username), int(time.time()) + lock_time * 60, lock_time * 60) lock_surplus_second = lock_time * 60 result_code = 2 result_data['result_msg'] = '用户已经锁定,请%s秒后再试。' % lock_surplus_second result_data['lock_surplus_second'] = lock_surplus_second # 登录成功 else: # user.backend = 'django.contrib.auth.backends.ModelBackend' # 指定默认的登录验证方式 # login(request, user) # token 登录 Token.objects.filter(user=user).delete() token, _ = Token.objects.get_or_create(user=user) # 保存用户登录信息 user_obj = User.objects.get(id=exist_user.id) user_obj.last_login = time_utils.get_current_time() user_obj.is_online = 1 # user_obj.session_id = request.session.session_key # 必须要先login登录完成,才会生成session_key user_obj.login_times += 1 user_obj.save() result_code = 0 cache.delete('verify_%s' % request_ip) cache.delete('error_login_%s_%s' % (request_ip, username)) result_data['need_verify'] = False # result_data['sessionid'] = user_obj.sessionid result_data['csrftoken'] = get_token(request) result_data['token'] = token.key # 生成验证码 if result_data["need_verify"]: img, code = gvcode.generate() img.save(settings.VERIFY_IMG_PATH) cache.set('verify_%s' % request_ip, code, 1 * 24 * 60 * 60) result_data['verify_url'] = settings.VERIFY_IMG_URL else: cache.delete('verify_%s' % request_ip) return Response({'result_code': result_code, 'result_data': result_data})
def forward_to_kolly(msg): # 随机等几秒,避免被风控 sleep(random.randint(1, 3)) # 如果是群聊,但没有被@,则不回复 if isinstance(msg.chat, Group) and not msg.is_at: return elif msg.sender.name == '##小号##' or msg.sender.name == '极致科创-大雄聊数码对接群': logger.info('收到群聊「{}」「{}」的消息:{}'.format(msg.sender.name, msg.member.name, msg.text)) if 'OPPO7' in msg.text or 'oppo7' in msg.text or 'Oppo7' in msg.text: ret = sync_data.query_brand_order2('oppo', 7) logger.info(ret) return ret elif 'VIVO7' in msg.text or 'vivo7' in msg.text or 'Vivo7' in msg.text: ret = sync_data.query_brand_order2('vivo', 7) logger.info(ret) return ret elif 'OPPO14' in msg.text or 'oppo14' in msg.text or 'Oppo14' in msg.text: ret = sync_data.query_brand_order2('oppo', 14) logger.info(ret) return ret elif 'VIVO14' in msg.text or 'vivo14' in msg.text or 'Vivo14' in msg.text: ret = sync_data.query_brand_order2('vivo', 14) logger.info(ret) return ret elif 'OPPO30' in msg.text or 'oppo30' in msg.text or 'Oppo30' in msg.text: ret = sync_data.query_brand_order2('oppo', 30) logger.info(ret) return ret elif 'VIVO30' in msg.text or 'vivo30' in msg.text or 'Vivo30' in msg.text: ret = sync_data.query_brand_order2('vivo', 30) logger.info(ret) return ret elif 'OPPO' in msg.text or 'oppo' in msg.text or 'Oppo' in msg.text: ret = sync_data.query_brand_order('oppo') logger.info(ret) return ret elif 'VIVO' in msg.text or 'vivo' in msg.text or 'Vivo' in msg.text: ret = sync_data.query_brand_order('vivo') logger.info(ret) return ret if 'draft' == msg.text: # 查所有文字草稿 return '\n'.join(auto.query_article_draft()) elif msg.sender.name == '内测' or msg.sender.name == '「蓝猫」早起俱乐部': logger.info('收到群聊「{}」「{}」的消息:{}'.format(msg.sender.name, msg.member.name, msg.text)) if '#早起打卡' in msg.text: current_date = time_utils.get_today_date() current_time = time_utils.get_current_time() if current_time < '05:00' or current_time > '06:45': return '「' + msg.member.name + '」,请在早上 05:00 到 06:45 之间打卡~' if len( early_check_dao. query_early_check_rows_by_user_name_and_date( msg.member.name, time_utils.get_today_date())) > 0: sum = len( early_check_dao.query_early_check_rows_by_user_name( msg.member.name)) return '「' + msg.member.name + '」,你今天打卡过了~ 当前打卡进度' + str( sum) + '/30' early_check_dao.add_early_check(msg.member.name, current_date, current_time) sum = len( early_check_dao.query_early_check_rows_by_user_name( msg.member.name)) return '恭喜「' + msg.member.name + '」早起打卡成功,当前打卡进度' + str( sum) + '/30,继续努力哦~' else: return '不识别的指令' else: logger.info('收到群聊「{}」「{}」的消息:{}'.format(msg.sender.name, msg.member.name, msg.text)) msg.forward(user_kolly, prefix='群聊「' + msg.sender.name + '」的「' + msg.member.name + '」发送内容:')
def save_model_record(self, name, me1, me10, me11, me12, me13, me14, me15, me16, me2, me3, me4, me5, me6, me7, me8, me9, description, param1, param2, param3, type, time_series_ID, window=QMessageBox()): """ Create a new model :param name: :param me1: :param me10: :param me11: :param me12: :param me13: :param me14: :param me15: :param me16: :param me2: :param me3: :param me4: :param me5: :param me6: :param me7: :param me8: :param me9: :param description: :param param1: :param param2: :param param3: :param type: :param time_series_ID: :return: """ try: with self.connection.cursor() as cursor: query = "INSERT INTO fpat_db.fpat_models (created_at, me1, me10, me11, me12, me13, me14, me15, me16,"\ " me2, me3, me4, me5, me6, me7, me8, me9, description, name, param1, param2, param3, type,"\ " time_series_ID) VALUES({created_at}, {me1}, {me10}, {me11}, {me12}, {me13}, {me14}, {me15},"\ " {me16}, {me2}, {me3}, {me4}, {me5}, {me6}, {me7}, {me8}, {me9},"\ "'{description}', '{name}', '{param1}', '{param2}',"\ " '{param3}', '{type}', {time_series_ID});".format(created_at=get_current_time(), me1=me1, me10=me10, me11=me11, me12=me12, me13=me13, me14=me14, me15=me15, me16=me16, me2=me2, me3=me3, me4=me4, me5=me5, me6=me6, me7=me7, me8=me8, me9=me9, description=description, name=name, param1=param1, param2=param2, param3=param3, type=type, time_series_ID=time_series_ID ) cursor.execute(query) self.connection.commit() cursor.close() return True except Exception as ex: print(ex) QMessageBox.about(window, "Warning", "Database Error !!!") return False
def save_answers(self, answer, question, dimension, evaluation_id, window=QMessageBox()): """ # Save the answer of the evaluation questions into the database :param answer: :param question: :param dimension: :param evaluation_id: :return: """ try: with self.connection.cursor() as cursor: answer_query = "INSERT INTO fpat_evaluation_answers( created_at, answer, dimension, question)"\ " VALUES({created_at}, {answer}, {dimension}, '{question}');".format(created_at=get_current_time(), answer=answer, dimension=dimension, question=question) cursor.execute(answer_query) self.connection.commit() answer_id = cursor.lastrowid mapping_query = "INSERT INTO fpat_db.fpat_evalutions_evalution_answers (fpat_evalutions_ID," \ " evalution_answers_ID) VALUES({fpat_evalutions_ID}, {evalution_answers_ID});" \ "".format(fpat_evalutions_ID=evaluation_id, evalution_answers_ID=answer_id) cursor.execute(mapping_query) self.connection.commit() return True except Exception as ex: print(ex) QMessageBox.about(window, "Warning", "Database Error !!!") return False