def get_price(): price = "" price2 = "" price3 = "" try: price_object = poco( "com.alibaba.wireless:id/price_private_tip_container") if price_object.exists(): logger.warning('Member price only.') return "", "", "" price_object = poco("com.alibaba.wireless:id/current_range") if price_object.exists(): price = price_object.get_text() return price, "", "" price_object1 = poco("com.alibaba.wireless:id/textView1") if price_object1.exists(): price_object2 = poco("com.alibaba.wireless:id/textView2") price_object3 = poco("com.alibaba.wireless:id/textView3") price1 = price_object1.get_text() price = price1 if price_object2.exists(): price2 = price_object2.get_text() if price_object3.exists(): price3 = price_object3.get_text() except Exception as e: capture_error(e) print("get_price") logger.info("已读取价格信息。") return price, price2, price3
def wrapper(*args, **kw): start_time = time.time() # logger.info('调用函数 = %s()' % func.__name__) result = func(*args, **kw) elapsed_time = time.time() - start_time logger.info("页面读取数据耗时 = %s" % format_time(elapsed_time)) # print("elapsed_time: %d" % elapsed_time) return result
def get_newest_info(secret_key): html = requests.get('http://jwc.njupt.edu.cn/1594/list.htm', headers=headers) html.encoding = 'utf-8' #Requests库的自身编码为: r.encoding = ‘ISO-8859-1’ data = etree.HTML(html.text) contentList = data.xpath('//*[@id="newslist"]/div/div/table/tr') for content in contentList: title = content.xpath('td/table/tr/td[1]/a/text()')[0] href = content.xpath('td/table/tr/td[1]/a/@href')[0] submittime = content.xpath('td/table/tr/td[2]/div/text()')[0] if title.startswith('【实践科】') and submittime == "2018-11-09": if href.endswith('.htm'): detail = getDetailpage(href) data_info = {'text': title, 'desp': detail} submit_info(secret_key, data_info) logger.info('完成发送')
def get_share_and_snap(title, table): share_text = '' snap_filename = None try: # 点击分享按钮 share_btn = poco("com.alibaba.wireless:id/iv_detail_shared") share_btn.wait_for_appearance(5) share_btn.click() # 等待出现二维码后才能点击复制口令 QR_obj = \ poco("android:id/content").child("android.widget.FrameLayout").offspring( "android.webkit.WebView").child( "android.view.View").child("android.view.View")[0].child("android.view.View").offspring( type="android.widget.Image") poco.wait_for_all(list(QR_obj), timeout=20) sleep(0.5) # 截图 # 若不在数据库中,则为新增爬取数据,需传递截图的文件名 if not snap_exists(title, table): snap_filename = get_goods_snapshot(SNAP_PATH) else: snap_filename = None # 点击“复制口令”按钮 copy_btn = poco("android:id/content").child( "android.widget.FrameLayout").offspring( "com.alibaba.wireless:id/dynamic_share_channel_layout" ).offspring("com.alibaba.wireless:id/dynamic_share_recycler_view" ).child("android.widget.LinearLayout")[0].child( "android.widget.LinearLayout").child( name="com.alibaba.wireless:id/item_name") copy_btn.wait_for_appearance() copy_btn.click() # 通过adb读取剪贴板中的分享口令 output = exec_cmd(adb_get_clipboard) share_text = parse_outpost(output) logger.info("读取分享口令") except Exception as e: capture_error(e) print("get_share_text") return share_text, snap_filename
def get_logistics(): try: logistics_city_object = poco( "com.alibaba.wireless:id/qx_logistics_city_txt") logistics_price_object = poco( "com.alibaba.wireless:id/qx_logistics_price_txt") if logistics_city_object.exists(): logistics_city = logistics_city_object.get_text() else: logistics_city = "无" if logistics_price_object.exists(): logistics_price = logistics_price_object.get_text() else: logistics_price = 0 logger.info("已读取物流信息。") return logistics_city, logistics_price except Exception as e: capture_error(e) print("get_logistics")
def submit(request): if 'checkSignin' in request.session and request.session['checkSignin']: email = request.session['email'] # nickname = request.session['nickname'] submission = [] for key in request.POST.keys(): submission.append(request.POST.get(key)) proId = eval(request.GET.get('proId')) jsonDataPath = 'data\\jsonData\\%s.json' % str(proId) with open(jsonDataPath) as f: X0 = (json.load(f))[0] raw_tokens = [item[0] for item in X0] problemInfo = Problem.objects.values('answer', 'blanks').filter(id=proId)[0] # answer_lst = json.loads(problemInfo['answer']) # TODO(tdye): not used answer_lst blanks_lst = json.loads(problemInfo['blanks']) i = 0 submissionId = Submission.get_next_submission_id() logger.info(submission) while i < len(blanks_lst): temp = raw_tokens[:] submissionPath = 'data\\submissions\\%d-%d-%d.cpp' % (submissionId, proId, i + 1) temp[blanks_lst[i]] = submission[i] assembleContent = '' for item in temp: assembleContent += item + ' ' with open(submissionPath, 'w') as f: logger.info('\n' + clean_c_style(assembleContent) + '\n') f.write(clean_c_style(assembleContent)) logger.info(raw_tokens) logger.info(temp) i += 1 try: dbSubmission = Submission(submissionId=submissionId, judgeStatus=-2, proId=proId, email=email, answer=json.dumps(submission)) dbSubmission.save() except IOError as e: logger.error(e) logger.error('IO Error occurs while saving a new Submission %d' % submissionId) finally: return redirect('/getStatusList?volume=1') else: # request.session['errmsg'] = 'Please Login First' return render(request, 'upload.html', {})
def transform(vector: [], id: int, difficulty): program, problem, blanks_lst, answer_lst = assemble(vector, difficulty=difficulty) if len(blanks_lst) == 0: logger.info('Model generates ZERO blank for problem %d!' % id) # for i in range(len(vector)): # probability = random.random() # if probability > 0.9: # vector[i][3] = 'B' for _ in range(len(vector) // 10): randId = random.randint(int(len(vector) * 1 / 4), int(len(vector) * 3 / 4)) vector[randId][3] = 'B' program, problem, blanks_lst, answer_lst = assemble( vector, difficulty=difficulty) logger.info('System generates %d blanks randomly for problem %d.' % (len(blanks_lst), id)) else: logger.info('Model generates %d blanks for problem %d.' % (len(blanks_lst), id)) program = clean_c_style(program) problem = clean_c_style(problem) return program, problem, blanks_lst, answer_lst
def teacher(request): """ teacher signup, signin, checkTeacherSignin/ :param request: :return: """ if request.method == 'GET': if request.GET.get('action') is None: repository = [] if 'checkTeacherSignin' in request.session and request.session['checkTeacherSignin']: email = request.session['teacherEmail'] for item in Teacher.get_my_repository(email=email, volume=1): item['averageScore'] = str(item['averageScore']) item['score'] = str("%.2f" % item['score']) repository.append(item) problemDB = Problem.objects.values('theme') problemDBInfo = {} from Model.themes import themes for problem in problemDB: themeList = (problem['theme']).split(',') for theme in themeList: theme = themes[theme] if theme in problemDBInfo.keys(): problemDBInfo[theme] += 1 else: problemDBInfo[theme] = 1 return render(request, 'teacher_index.html', {'repository': repository, 'problemDBInfo': problemDBInfo}) elif request.GET.get('action') == 'logout': nextURL = request.GET.get('next') request.session['checkTeacherSignin'] = False request.session['teacherEmail'] = '' request.session['teacherNickname'] = '' return redirect(nextURL) elif request.method == 'POST': if request.GET.get('action') == 'signin': email = request.POST.get('email') password = request.POST.get('password') exist = Teacher.objects.filter(email=email).exists() nextURL = request.GET.get('next') if exist: teacher = Teacher.objects.all().filter(email=email)[0] if check_password(password, teacher.password): request.session['checkTeacherSignin'] = True request.session['teacherEmail'] = teacher.email request.session['teacherNickname'] = teacher.nickname repository = [] for item in Teacher.get_my_repository(email=email, volume=1): item['averageScore'] = str(item['averageScore']) item['score'] = str("%.2f" % item['score']) repository.append(item) request.session['repository'] = json.dumps(repository) return redirect(nextURL) else: # The password is wrong. return redirect(nextURL) else: # The account does not exist. return redirect(nextURL) elif request.GET.get('action') == 'upload': title, themes, description, score, author = \ request.POST.get('title'), \ request.POST.get('themes'), \ request.POST.get('description'), \ int(request.POST.get('score')), \ request.session['teacherEmail'] themes_ = themes.split(',') id = Problem.get_next_problem_id() origin_filename = "%s-origin%s" % (str(id), '.cpp') problem_filename = "%s%s" % (str(id), '.cpp') jsonData_filename = "%s%s" % (str(id), '.json') problem_file_obj = request.FILES.get('problem-file') origin_file_path = os.path.join('data\\problem', origin_filename) problem_file_path = os.path.join('data\\problem', problem_filename) f = open(origin_file_path, mode="wb") for i in problem_file_obj.chunks(): # TODO(tdye): using coroutine? f.write(i) f.close() X = program2vector.transform(origin_file_path) jsonDataPath = os.path.join('data\\jsonData', jsonData_filename) with open(jsonDataPath, mode='w', encoding='utf-8') as f: json.dump(X, f) for theme_ in themes_: X[0].append(["", "", int(theme_), "O"]) X0 = predict.predict(X, themes_)[0] difficulty = int(request.POST.get('difficulty')) program, problem, blanks_lst, answer_lst = vector2program.transform(X0, id, difficulty=difficulty) logger.info('\nOrigin file %d:\n%s' % (id, program)) logger.info('\nProblem file %d:\n%s' % (id, problem)) logger.info('\nBlanks %d:\n%s' % (id, blanks_lst)) logger.info('\nAnswer %d:\n%s' % (id, answer_lst)) with open(problem_file_path, mode='w') as f: f.write(problem) os.remove(origin_file_path) test_cases_obj = request.FILES.get('test-cases') if not os.path.exists("%s%s" % ('data/test_cases/', str(id))): os.mkdir("%s%s" % ('data/test_cases/', str(id))) else: print('directory already exists') logger.error('Directory -%s%s already exists' % ('data/test_cases/', str(id))) return False test_cases_path_rar = os.path.join('data/test_cases', str(id), str(id) + '.zip') test_cases_path = os.path.join('data/test_cases', str(id)) f = open(test_cases_path_rar, mode="wb") for i in test_cases_obj.chunks(): # TODO(tdye): using coroutine? f.write(i) f.close() unzip_file(test_cases_path_rar, test_cases_path) os.remove(test_cases_path_rar) # update database try: db_problem = Problem(id=id, title=title, theme=themes, description=description, author=author, score=score, answer=json.dumps(answer_lst), blanks=json.dumps(blanks_lst)) db_problem.save() except ValueError: print("Invalid parameters => (%d, %s, %s, %s, %s, ) while saving a problem!" % ( id, title, themes, description, author)) logger.error("Invalid parameters => (%d, %s, %s, %s, %s, ) while saving a problem!" % ( id, title, themes, description, author)) finally: request.session['program'] = program request.session['problem-id'] = id request.session['problem'] = problem request.session['answer'] = answer_lst return redirect('/generation')
def crawl_pages(self): parser = HtmlParser(self.driver) # 初始化解析类 save = Saver() # 初始化数据保存类 run_times = 0 while True: run_times += 1 # 记录运行次数,仅用于调试 logger.info("Preparing to parse page ({0}/{1})".format( self.current_page, self.page_counts)) # 探测网络是否正常展示数据,如果没有则刷新数据 self.refresh() # 刷新页面到底部 logger.info(" Refresh current page.") self.goto_page_bottom() # 悬停每个商品上,获取价格数据 logger.info(" Hover current page.") self.hover_all(0.5) # 解析页面数据 logger.info(" Parse current page.") records = parser.get_page_data() # 保存记录 logger.info("Saving to the database.") save.to_db(records) # 调试模式下的终止 if DEBUG and run_times >= 1: break # 若当前页面数与总页面数相同,则停止循环 if self.current_page == self.page_counts: break logger.info("Scroll to the next page.") self.scroll_page() logger.info("Wait and delay.") sleep(5)
def get_detail_data(): # crawler_record = init_crawler_record() crawler_record = {} # 初始化采集数据的dict对象 trade_data = [] seller_info = () try: product_object = poco("com.alibaba.wireless:id/tv_detail_subject") product = product_object.get_text() product = product.strip(' ') logger.info("扫描商品 = {}".format(product)) price1, price2, price3 = get_price() logistics_city, logistics_price = get_logistics() # 获取分享口令和商品截图 share_text, snap_filename = get_share_and_snap(product, TABLE) crawler_record['share_text'] = share_text if snap_filename is not None: crawler_record['snapshot'] = snap_filename check_page_no = 0 # 详情页当前页面数 trade_info_checked = False # 保存是否扫描过交易信息 seller_info_checked = False # 保存是否扫描过厂家信息 while check_page_no <= 2: # 翻页3次扫描关键词 headers = find_key_info() if not trade_info_checked: if headers[TRADE_INFO]: # 若存在但不是完整的在页面中,则滚动到顶部 name, pos, key_obj = headers[TRADE_INFO] if not object_in_view(TRADE_INFO, pos): print("找到部分交易数据,翻动到顶部") scroll_to_top(pos, top=0.2) sleep(0.5) # 滑动后需要暂停,否则无法按到按钮 logger.info("读取交易信息") # 点击查看按钮,读取详细交易信息 trade_data = get_trade_info() trade_info_checked = True headers = find_key_info() if not seller_info_checked: if headers[SELLER_INFO]: # 回传是否存在组件 name, pos, key_obj = headers[SELLER_INFO] if not object_in_view(SELLER_INFO, pos): scroll_to_top(pos, top=0.3) logger.info("读取厂家信息") seller_info = get_seller_info() seller_info_checked = True # 都找到了,退出本次扫描 if trade_info_checked and seller_info_checked: break # 先找到了厂家,则说明没有交易信息,直接退出扫描 if (trade_info_checked is False) and (seller_info_checked is True): break check_page_no += 1 scroll_detail_page() # 滚动一整页 # 组合采集的数据 crawler_record['title'] = product crawler_record['share_text'] = share_text # crawler_record['snapshot'] = snap_filename crawler_record['price1'] = price1 crawler_record['price2'] = price2 crawler_record['price3'] = price3 crawler_record['logistics_city'] = logistics_city crawler_record['logistics_price'] = logistics_price # 保存交易信息的列表 for i, trade in enumerate(trade_data): trade_keyword = 'trade' + str(i + 1) crawler_record[trade_keyword] = trade # 解包已读取的厂家信息数据 crawler_record['company'], crawler_record['years'], crawler_record['back_rate'], crawler_record['buyer'], \ crawler_record['desc'], crawler_record['respo'], crawler_record['delivery'], crawler_record['sign_desc'], \ crawler_record['sign_respo'], crawler_record['sign_delivery'] = seller_info # if crawler_record['desc'] is None: # crawler_record['desc'] = '' # if crawler_record['respo'] is None: # crawler_record['respo'] = '' # if crawler_record['delivery'] is None: # crawler_record['delivery'] = '' save_crawler(crawler_record, TABLE) except Exception as e: capture_error(e) print("get_detail_data")