class CmonitorCli(object): def __init__(self): self.system = SystemService() self.db = DbService() self.hostname = self._get_hostname() self.monitor_service = MonitorService(self.hostname) self.scheduler = SchedulerService() self.scheduler.run_scheduler() self.scheduler.add_db_cleanup_task() logger.info("connect to: {}/ws/monitor/{}/{}/".format(WEBSOCKET_SERVER, ACC_USERNAME, self.hostname), ) self.wsocket = WsClient("{}/ws/monitor/{}/{}/".format(WEBSOCKET_SERVER, ACC_USERNAME, self.hostname), func_onopen=self.scheduler.add_update_job, func_onmsg=self.watcher , func_onclose=self.scheduler.delete_update_job, func_report=self.updater, func_cli_upd=self.cli_updater) self.wsocket.connect() def __enter__(self): pass def __exit__(self, exc_type, exc_val, exc_tb): logger.info("Exit") self.wsocket.close() def _get_hostname(self): os_hostname = self.system.get_hostname() if os_hostname[0] is None or len(os_hostname[0]) == 0: hostname = self.db.get_hostname() if self.db.get_hostname() is None: hostname = rnd_servname(5) self.db.write_hostname(hostname) return hostname else: return os_hostname[0] """"" ws: passed through ws initializer method in socket_client.py """"" def watcher(self, ws, content): result_json = self.monitor_service.watch_message(content) if result_json is not None: ws.send(result_json) logger.debug("Send resp: {}".format(result_json)) def updater(self, ws): result_json = self.monitor_service.report_status() if result_json is not None: ws.send(result_json) logger.debug("Sent update: {}".format(result_json)) def cli_updater(self, ws): msg = cli_update_request(self.hostname) if msg is not None: ws.send(json.dumps(msg, default=str)) pass
def check_photo(photo_info): db_service = DbService() all = db_service.execute_script('SELECT count(have_photo) FROM resume') have_photo = db_service.execute_script( 'SELECT count(have_photo) FROM resume WHERE have_photo = TRUE ') percentage = int(have_photo[0][0] / all[0][0] * 100) - 1 if photo_info is None: return { 'title': 'Отсутствует фотография', 'text': 'Фотография в резюме всегда будет плюсом, даже если должность не предполагает общения с людьми.' ' Фото сделает резюме персонализированным: ее легче заметить, а отклик с фото сложнее пропустить' ' или удалить. Но выбирать портрет для резюме нужно критически: неудачное фото может все испортить.' ' Лучше всего подойдет профессиональное портретное фото, деловое и нейтральное.', 'sub_text': f'На данный момент около {percentage}% всех резюме публикуются с фотографией!', 'type': 'warning' } face_cascade = cv2.CascadeClassifier( '..//workflow/haarcascade_frontalface_default.xml') req = requests.get(photo_info['medium']) arr = np.asarray(bytearray(req.content), dtype=np.uint8) img = cv2.imdecode(arr, -1) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) faces = face_cascade.detectMultiScale(gray, 1.1, 4) if len(faces) == 0: return { 'title': 'Не найдено лицо на фотографии', 'text': 'Мы не смогли распознать вас на фотографии. Убедитесь, что ваше лицо хорошо' ' различимо и не закрыто элементами одежды.', 'type': 'warning' } if len(faces) > 1: return { 'title': 'Более одного лица на фотографии', 'text': 'Нам удалось распознать несколько лиц на вашей фотографии. Групповые фото не подходят для ' 'резюме, убедитесь, что на фото изображены только вы.', 'type': 'danger' } return False
def get_vacancy_title_recommendation(title): title_tokens = re.sub('[^a-zа-я]', ' ', title.lower()) title_tokens.replace(' ', ' ') title_tokens = title_tokens.split(' ') title_tokens = set([token for token in title_tokens if token != '']) script_text = "SELECT name, count(id) FROM vacancies WHERE " if len(title_tokens) == 0: return False for token in title_tokens: script_text += "lower(name) like %s and " script_text = script_text[:-4] script_text += "GROUP BY name ORDER BY count(id) DESC" db_service = DbService() vacancies = db_service.execute_script( script_text, tuple([f"%{token}%" for token in title_tokens])) i = 0 relevant_titles = [] for vacancy in vacancies: if i == 4: break vacancy_tokens = set( re.sub('[^a-zа-я]', ' ', vacancy[0].lower()).split(' ')) if vacancy_tokens == title_tokens: continue i += 1 relevant_titles.append(vacancy[0]) if not relevant_titles: return False return { 'title': 'Рекомендации к желаемой должности', 'text': f'При выборе названия для желаемой должности, рекомендуется ориентироваться на конкретную' f' интересующую именно вас вакансию определенной компании. Но в том случае если вы еще только в' f' поиске желаемой вакансии, то мы можем посоветовать вам следующие наименование должностей, которые' f' сейчас востребованы на рынке труда и похожи на желаемую вами должность:', 'sub_list': relevant_titles, 'type': 'info' }
def system_stats(self, body): result = [] try: with DbService() as db_service: services_status_log = db_service.get_system_in_n_days( int(body['days'])) if body['days'] == 1: for time, grp in itertools.groupby( services_status_log, lambda x: (x[0].hour, x[0].minute)): result.append(self.parse_stats(time, grp)) elif body['days'] > 1: for time, grp in itertools.groupby( services_status_log, lambda x: (x[0].month, x[0].day, x[0].hour)): result.append(self.parse_stats(time, grp)) return { 'success': True, 'body': { 'duration': body['days'], 'data': result } } except Exception as e: return {'success': False, 'body': e}
def get_books_detail(page=1, book_ids=[], filter_by_id=True): books = [] str_book_ids = ",".join(book_ids) max_books = page * 25 skip_books = (page - 1) * 25 query = BOOK_DETAILS_BY_ID.format(str_book_ids, max_books) if not filter_by_id: query = BOOK_DETAILS.format(max_books) print(query) count = 0 data = DbService.execute_query(query) if not data: return for row in data: count += 1 if count <= skip_books: continue book = {} book_id = row[0] book["id"] = book_id book["download_count"] = row[1] book["media_type"] = row[2] book["title"] = row[3] book["authors"] = get_book_authors(book_id) book["bookshelves"] = get_bookshelves(book_id) book["languages"] = get_book_languages(book_id) book["formats"] = get_book_formats(book_id) book["subjects"] = get_book_subject(book_id) books.append(book) return books
def get_book_ids(query): book_ids = set() data = DbService.execute_query(query) for row in data: book_ids.add(str(row[0])) return book_ids
def report_status(self, msg_command=MessageCommands.STATUS_CLI_UPDATE): try: with SystemService() as system_service: system = system_service.report_system_services() codius = system_service.report_codius() extra_services = system_service.report_extra_services() with DbService() as db_service: codius['income_24'] = 0 db_service.write_status(codius, system) if len(db_service.get_codiusd_in_n_days(1)) > 0: codius['income_24'], codius['count_24'] = calc_income( db_service.get_codiusd_in_n_days(1)) return result_to_json_response( msg_command, ResponseStatus.OK, self.hostname, report_system=system, report_codius=codius, report_extra_services=extra_services) except Exception as e: logger.error("Error on command: {} :{}".format( msg_command.name, e)) return result_to_json_response(msg_command, ResponseStatus.ERROR, self.hostname, body=e)
def __init__(self): self.system = SystemService() self.db = DbService() self.hostname = self._get_hostname() self.monitor_service = MonitorService(self.hostname) self.scheduler = SchedulerService() self.scheduler.run_scheduler() self.scheduler.add_db_cleanup_task() logger.info("connect to: {}/ws/monitor/{}/{}/".format(WEBSOCKET_SERVER, ACC_USERNAME, self.hostname), ) self.wsocket = WsClient("{}/ws/monitor/{}/{}/".format(WEBSOCKET_SERVER, ACC_USERNAME, self.hostname), func_onopen=self.scheduler.add_update_job, func_onmsg=self.watcher , func_onclose=self.scheduler.delete_update_job, func_report=self.updater, func_cli_upd=self.cli_updater) self.wsocket.connect()
def get_book_subject(book_id): query = BOOK_SUBJECTS.format(book_id) data = DbService.execute_query(query) subjects = [] for row in data: subjects.append(row[0]) return subjects
def get_book_formats(book_id): query = BOOK_FORMAT.format(book_id) book_formats = {} data = DbService.execute_query(query) for row in data: book_formats[row[0]] = row[1] return book_formats
def get_book_languages(book_id): query = BOOK_LANGUAGES.format(book_id) data = DbService.execute_query(query) languages = [] for row in data: languages.append(row[0]) return languages
def get_bookshelves(book_id): query = BOOKSHELVES.format(book_id) data = DbService.execute_query(query) bookshelfs = [] for row in data: bookshelfs.append(row[0]) return bookshelfs
def get_book_authors(book_id): query = BOOK_AUTHORS.format(book_id) data = DbService.execute_query(query) author_details = [] author_detail = {} for row in data: author_detail["name"] = row[0] author_detail["birth_year"] = row[1] author_detail["death_year"] = row[2] author_details.append(author_detail) return author_details
def resume(request): parser = HhApiParser() access_token = HhUser.objects.get(user_id=request.user.id).access_token username = get_user_name(request) resume_id = request.GET.get("resume_id", "") top_resumes = get_top_resumes(request) info = parser.get_applicant_resume_data(resume_id, access_token) db_service = DbService() currencies_db = db_service.execute_script('select code from currency') currencies = [cur[0] for cur in currencies_db] # Получить рекомендации по резюме warnings = get_recommendations(info, request) warnings.sort(key=operator.itemgetter('type')) # Обработать дату for exp in info['experience']: start = datetime.datetime.fromisoformat(exp['start']) exp['start'] = start.strftime(f'{MONTH_LIST[start.month-1]} %Y') if exp['end'] is not None: end = datetime.datetime.fromisoformat(exp['end']) exp['end'] = end.strftime(f'{MONTH_LIST[end.month - 1]} %Y') return render( request, 'main/resume_edit.html', { 'active_el': resume_id, 'user': username, 'resume': info, 'warnings': warnings, 'top_resumes': top_resumes, 'currencies': currencies, })
def stats_n_days(self, n): dialy = [] with DbService() as db_service: pods_n_days = db_service.get_codiusd_in_n_days(n) for dt, grp in itertools.groupby(pods_n_days, key=lambda x: x[0].date()): tmp = [] for v in list(grp): tmp.append(v) income, count = calc_income(tmp) dialy.append({'date': dt, 'income': income, 'count': count}) return dialy
def get_book_count(): data = DbService.execute_query(BOOK_COUNT) for row in data: return int(row[0])
from services.db_service import DbService from data_parsers.hhResumeParser import HhResumeParser import threading from tqdm import tqdm import json service = DbService() data = service.execute_script( "SELECT id FROM specialization WHERE profarea_id = '1'") specs = [item[0] for item in data] del data parser_1 = HhResumeParser() parser_2 = HhResumeParser() resumes_data = [[], []] for i in tqdm(range(0, len(specs) - 1, 2)): thread_1 = threading.Thread( target=lambda s: resumes_data[0].extend(parser_1.get_resumes(s)), args=(specs[i], )) thread_2 = threading.Thread( target=lambda s: resumes_data[1].extend(parser_2.get_resumes(s)), args=(specs[i + 1], )) threads = [thread_1, thread_2] for t in threads: t.start() for t in threads: t.join()
from services.db_service import DbService db_service = DbService() with open('..//data/hh_ru_backup_14052021_schema') as file: db_service.execute_file_script(file) # Удаление использованных данных del db_service del DbService
from data_parsers.hhApiParser import HhApiParser from services.db_service import DbService # Получение данных справочника dictionaries и занесение их в parser = HhApiParser() db_service = DbService() data = parser.get_dictionaries() db_service.add_to_schedule_table(data.get('schedule')) db_service.add_to_experience_table(data.get('experience')) db_service.add_to_currency_table(data.get('currency')) db_service.add_to_employment_table(data.get('employment')) db_service.add_to_employer_type_table(data.get('employer_type')) # Получение данных из справочника specializations и занесение их в БД data = parser.get_specializations_dict() db_service.add_to_specialization_table(data) # Удаление использованных данных del parser del db_service del data
from services.db_service import DbService from scipy.sparse import lil_matrix from sklearn.preprocessing import normalize from scipy.sparse import spdiags from scipy.sparse import vstack import numpy as np import json db_service = DbService() # Получение вакансий vacancies_id = db_service.execute_script(""" SELECT v.id FROM vacancies as v INNER JOIN vacancy_skill as v_s ON v.id = v_s.vacancy_id GROUP BY v.id HAVING count(v_s.vacancy_id) > 2 """) vacancy_to_col = {} for col_id, (vacancy_id, ) in enumerate(vacancies_id): vacancy_to_col[vacancy_id] = col_id # Получение ключевых навыков skills = db_service.execute_script(""" SELECT DISTINCT skill_name FROM vacancy_skill GROUP BY skill_name HAVING count(vacancy_id) > 25
def cleanup_db_check(self): with DbService() as db_service: db_service.cleanup_old_records(180)
resumes_id = [] for resume in resumes: if resume['id'] in resumes_id: resume['broken'] = True resumes.remove(resume) continue resumes_id.append(resume['id']) del resume['additional_education'] resume['total_experience'] = 0 for exp in resume['experience']: resume['total_experience'] += exp['time'] del resume['experience'] old_specs = resume['specializations'] resume['specializations'] = [] for spec in old_specs: if '.' in spec: resume['specializations'].append(spec) normal_resumes = [] for r in resumes: if len(r) == 12: normal_resumes.append(r) print('---> Запись') db_service = DbService() db_service.load_resumes(normal_resumes)
def load_vacancies(d_range): params = { 'page': 0, 'per_page': 100, 'date_from': d_range[0], 'date_to': d_range[1], 'specialization': 1 } vac = parser.get_vacancies(req_params=params) db_service.save_vacancies(vac) vac.clear() db_service = DbService() THREADS_NUMBER = 6 for i in tqdm(range(0, len(dates), THREADS_NUMBER)): threads = [] upper_i = i + THREADS_NUMBER if upper_i > len(dates): upper_i = len(dates) for j in range(i, upper_i): threads.append( threading.Thread(target=load_vacancies, args=(dates[j], ))) for t in threads: t.start()
import json from services.db_service import DbService import re with open('..//data/resume_data.json', 'r') as file: resume_list = json.load(file) db_service = DbService() db_specializations = db_service.execute_script( "SELECT id, name FROM specialization") bd_schedules_orig = db_service.execute_script("SELECT id, name FROM schedule") bd_schedules = {} for sch in bd_schedules_orig: bd_schedules[sch[1].lower()] = sch[0] del bd_schedules_orig bd_employment_orig = db_service.execute_script( "SELECT id, name FROM employment") bd_employment = {} for emp in bd_employment_orig: bd_employment[emp[1].lower()] = emp[0] del bd_employment_orig id_regex = re.compile(r'e/(.+?)\?') resume_copy = [resume for resume in resume_list] for resume in resume_copy: # Резюме без ссылки и названия не рассматриваем if (resume['href'] is None) or (resume['title'] is None):