def checkEmpty(params): global global_check_data; dbManager = SharedMemoryManager.getInstance(); db = dbManager.query(); custom_params = copy.deepcopy(params); report_keys = fn.getNestedElement(params, 'keys.report', ['procurement', 'budget']); interval = fn.getNestedElement(params, 'interval', 1); past_dates = DateTime.getPastDate(count=12, duration=interval); # check previous 12 month data year = Crawl.extractYear(data=past_dates[0]); first_date = past_dates[0][-1][0]; last_date = past_dates[0][0][1]; # Logger.v('first_date', first_date, 'last_date', last_date); state_by = 'state_code'; states = list(db['state'].find({},{'_id': 0, state_by: 1})); result = {}; datetime = DateTime.toString(DateTime.now(tzinfo=msia_tz), date_format='%Y-%m-%d-%H-%M-%S'); custom_params['first_date'] = first_date; custom_params['last_date'] = last_date; custom_params['state_by'] = state_by; custom_params['states'] = states; temp_result = generateTemplate(params=custom_params); for rk in report_keys: if rk not in global_check_data: global_check_data[rk] = []; for y in year: root_path = '{0}/{1}/year_{2}'.format(crawl_folder, rk, y); openDir(root_path, rk); for gcd in global_check_data[rk]: date = gcd.split('_')[0]; state = gcd.split('_')[1]; if DateTime.inrange(date, [first_date, last_date]): try: temp_result[rk][date][state] += 1; except Exception as e: # Logger.v('Main.checkEmpty:', e); pass; for rk in temp_result: if rk not in result: result[rk] = []; for date in temp_result[rk]: result[rk].append(temp_result[rk][date]); filename = '{0}/{1}_check_moh_empty'.format(test_folder, rk); # filename = 'tests/{0}_{1}_check_moh_empty'.format(rk, datetime); fn.writeExcelFile(filename=filename, data=result[rk]); global_check_data = {}; return result;
def generateCrawlParam(params): Debug = DebugManager.DebugManager() Debug.start() global pass_month_quantity dbManager = SharedMemoryManager.getInstance() db = dbManager.query() crawl_params = {} limit_for_test = 10 report_keys = fn.getNestedElement(params, 'keys.report', ['budget', 'procurement']) interval = fn.getNestedElement(params, 'interval', 1) filter_facility_code = fn.getNestedElement(params, 'filter.facility_code', True) check_empty = fn.getNestedElement(params, 'schedule_params.check_empty', False) today = fn.getNestedElement( params, 'schedule_params.today', DateTime.toString(DateTime.now(tzinfo=msia_tz))) # Logger.v('filter_facility_code', filter_facility_code); if check_empty: # past_dates = DateTime.getPastDate(count=pass_month_quantity, duration=interval); past_dates = DateTime.getPastDate( count=pass_month_quantity, duration=interval, end=DateTime.convertDateTimeFromString(today)) # Logger.v('past_dates', past_dates); # exit(); else: past_dates = DateTime.getPastDate(count=pass_month_quantity, duration=interval) # Logger.v('past_dates', past_dates); state_codes = retrieveOption(collection_name='state', show_keys=['state_code'], hide_keys=['_id']) state_code = extractListByKey(data=state_codes, key='state_code') facility_codes = retrieveOption(collection_name='facility', show_keys=['facility_code'], hide_keys=['_id']) facility_code = extractListByKey(data=facility_codes, key='facility_code') for key in report_keys: # Logger.v('collection', key, past_dates[0]); Debug.trace() if key not in crawl_params: crawl_params[key] = [] mongo_data = list(db[key].find({}, {})) if len(mongo_data) == 0: dates = past_dates[0][:] else: dates = past_dates[0][:1] year = extractYear(data=dates) # Logger.v('year', year); # Logger.v('filter_facility_code', filter_facility_code); if key == 'budget': if not filter_facility_code: iteration = 0 total = len(year) * len(state_code) # fn.printProgressBar(iteration=iteration, total=total); for y in year: for sc in state_code: obj_ = { 'financial_year': y, 'state_code': sc, 'page_type': key, 'upid': '_'.join([sc, y]), 'url': api_links[key].format(sc, y, ''), 'start_date': today, 'end_date': today, } if obj_ not in crawl_params[key]: crawl_params[key].append(obj_) # Logger.v('len(crawl_param])', len(crawl_params[key])); iteration += 1 # fn.printProgressBar(iteration=iteration, total=total); else: iteration = 0 total = len(year) * len(state_code) * len( facility_code[:limit_for_test]) # fn.printProgressBar(iteration=iteration, total=total); for y in year: for sc in state_code: for fc in facility_code[:limit_for_test]: obj_ = { 'financial_year': y, 'state_code': sc, 'page_type': key, 'upid': '_'.join([sc, y, fc]), 'facility_code': fc, 'url': api_links[key].format(sc, y, fc), 'start_date': today, 'end_date': today, } if obj_ not in crawl_params[key]: crawl_params[key].append(obj_) # Logger.v('len(crawl_param])', len(crawl_params[key])); iteration += 1 # fn.printProgressBar(iteration=iteration, total=total); elif key == 'procurement': if not filter_facility_code: for past_duration in dates: start_date = DateTime.toString( DateTime.getDaysAgo(days_to_crawl=-1, datefrom=past_duration[0])) end_date = DateTime.toString( DateTime.getDaysAgo(days_to_crawl=1, datefrom=past_duration[1])) for sc in state_code: obj_ = { 'state_code': sc, 'start_date': start_date, 'end_date': end_date, 'page_type': key, 'upid': '_'.join([sc, start_date, end_date]), 'url': api_links[key].format(sc, start_date.replace('-', ''), end_date.replace('-', ''), ''), } if obj_ not in crawl_params[key]: crawl_params[key].append(obj_) # Logger.v('len(crawl_param])', len(crawl_params[key])); else: for past_duration in dates: start_date = DateTime.toString( DateTime.getDaysAgo(days_to_crawl=-1, datefrom=past_duration[0])) end_date = DateTime.toString( DateTime.getDaysAgo(days_to_crawl=1, datefrom=past_duration[1])) for sc in state_code: for fc in facility_code[:limit_for_test]: obj_ = { 'state_code': sc, 'start_date': start_date, 'end_date': end_date, 'page_type': key, 'facility_code': fc, 'upid': '_'.join([sc, start_date, end_date, fc]), 'url': api_links[key].format( sc, start_date.replace('-', ''), end_date.replace('-', ''), fc) } if obj_ not in crawl_params[key]: crawl_params[key].append(obj_) # Logger.v('len(crawl_param])', len(crawl_params[key])); for c in crawl_params: # Logger.v('crawl_params', c, len(crawl_params[c])); fn.writeExcelFile(filename='{0}/{1}'.format(test_folder, c), data=crawl_params[c]) Logger.v('crawl_params', len(crawl_params)) Debug.show('Generate Crawl Params') return crawl_params