Пример #1
0
def combinedFacilityList(data):
    dbManager = SharedMemoryManager.getInstance()
    db = dbManager.query()
    result = ModelFacility.getActiveFacility()
    for state_code in data:
        for facility_code in data[state_code]:
            state_facility_exist = list(
                filter(
                    lambda x: x['state_name'] == state_code and x[
                        'facility_code'] == facility_code, result))
            # Logger.v('state_facility_exist', state_facility_exist, state_code, facility_code, data[state_code][facility_code]['name']);
            if not state_facility_exist:
                result.append({
                    'state_code':
                    state_code,
                    'state_name':
                    state_code,
                    'ptj_code':
                    '',
                    'ptj_name':
                    '',
                    'facility_code':
                    facility_code,
                    'facility_name':
                    data[state_code][facility_code]['name'],
                    'facility_type':
                    '',
                    'active':
                    'a'
                })
    return result
Пример #2
0
def getMissingDates(data):
	dbManager = SharedMemoryManager.getInstance();
	db = dbManager.query();
	missing_dates = {};
	today = DateTime.now(tzinfo=msia_tz); # date only
	state_by = 'state_code';
	states = list(db['state'].find({},{'_id': 0, state_by: 1}));
	current_year = DateTime.getDateCategoryName(date=DateTime.now(tzinfo=msia_tz), element='year');
	for rk in data:
		row = data[rk];
		if rk not in missing_dates:
			missing_dates[rk] = [];

		dates = groupDates(params={'states': states, 'state_by': state_by}, data=row);
		for date in dates['missing']:
			end_date_of_month = DateTime.getDaysAgo(days_to_crawl=1, datefrom=DateTime.getNextMonth(DateTime.convertDateTimeFromString(date)));
			day_diff = DateTime.getDifferenceBetweenDuration([today, end_date_of_month]);

			if day_diff >= 0:
				date_str = DateTime.toString(today);
			else:
				date_str = DateTime.toString(end_date_of_month);

			if date_str not in dates['crawled']:
				missing_dates[rk].append(date_str);

			# Logger.v('day_diff', day_diff);
			# Logger.v('date', DateTime.getDaysAgo(days_to_crawl=1, datefrom=DateTime.getNextMonth(DateTime.convertDateTimeFromString(ed))));
		missing_dates[rk] = sorted(list(set(missing_dates[rk])), reverse=True);
	return missing_dates;
Пример #3
0
def update(data):
    global msia_tz, column_keymap, collection_name
    dbManager = SharedMemoryManager.getInstance()
    db = dbManager.query()
    state_facility_code = '_'.join(
        [str(data['state']), str(data['facility_code'])])
    if state_facility_code not in list(set(unique_facility)):
        state_name = fn.getNestedElement(data, 'state')
        state_code = fn.getNestedElement(data, 'state')
        facility_name = fn.getNestedElement(data, 'facility_name')
        facility_code = fn.getNestedElement(data, 'facility_code')
        date = fn.getNestedElement(data, 'date')
        date_string = DateTime.toString(date)
        values = {
            'state_name': state_name,
            'state_code': state_code,
            'facility_name': facility_name,
            'facility_code': facility_code,
            'state_updated_at': date_string,
            'facility_updated_at': date_string,
            'date': date_string,
        }
        dbManager.addBulkInsert(collection_name, values, batch=True)
        unique_facility.append(state_facility_code)
    dbManager.executeBulkOperations(collection_name)
Пример #4
0
def save(params, chunk, chunks_info):
	global collection_name, column_keymap;
	upload_date = fn.getNestedElement(params, 'date');
	data = File.readChunkData(chunk);
	dbManager = SharedMemoryManager.getInstance();
	db = dbManager.query();
	current_index = fn.getNestedElement(chunks_info, 'current', 0);
	total_index = fn.getNestedElement(chunks_info, 'total', len(data));

	total_length = len(data);
	queue_info = chunks_info['queue']
	# Logger.v('Running Index:', chunks_info['queue']['running']);
	chunks_info['queue']['current']+=1;
	# Logger.v('Saving from... {0}/{1}, current package: {2}'.format(current_index, total_index, total_length) );
	fn.printProgressBar(queue_info['current'], queue_info['total'], 'Processing Chunk Insertion');
	for idx in range(0, total_length):
		row = data[idx];
		# Logger.v('row', row);
		obj_ = transformToLowercase(row);
		date_only = obj_['approved_date'].split(' ')[0];
		# Logger.v('date_only', date_only);
		obj_.update({
			'approved_year_month': DateTime.getDateCategoryName(date=date_only, element='year_month_digit'),
			'upload_date': upload_date,
		});
		dbManager.addBulkInsert(collection_name, obj_, batch=True);
		ModelSIIntegrity.update(data=obj_);
		retrieveIssueOption(obj_);
	#ensure all data is save properly
	dbManager.executeBulkOperations(collection_name);
	return chunks_info;
Пример #5
0
def upload(params):
	Debug = DebugManager.DebugManager();
	Debug.start();
	Debug.trace('start');
	dbManager = SharedMemoryManager.getInstance();
	db = dbManager.query();
	date = fn.getNestedElement(params, 'date');
	path = fn.getNestedElement(params, 'path');
	# url = fn.getNestedElement(params, 'callback_url'); # required params to handle callback_url
	paths, should_reset = ModelUpload.getPath(params);
	for idx in range(0, len(paths)):
		p = paths[idx];
		processed_filename = File.converExcelFileToCsv(p, ignore_index=True);
		Logger.v('processed_filename', processed_filename);
		Debug.trace('convert to json : path {0}'.format( processed_filename ) );
		if idx == 0 and should_reset: #reset once at the beginning
			Logger.v('Reset Database.');
			reset(date); #reset stock_issue collection
			ModelSIIntegrity.reset(date); #reset stock_issue_datalog by date given
		File.readCsvFileInChunks(processed_filename, save, params, chunksize=chunksize);
		Debug.trace('uploaded to mongo.');
	generateIndex();
	ModelSIIntegrity.generateIndex();
	Debug.trace('indexing mongo collection.');
	saveIssueOption();
	Debug.trace('save option to json.');
	trigger_params = copy.deepcopy(params);
	trigger_params['result'] = 'data count: {0}'.format(params['data_count'][path]);
	# Logger.v('trigger_params', trigger_params);
	dbManager.executeBulkOperations(None); # Insert all the remaining job at once.
	ReportStock.triggerOnComplete(trigger_params);
	Debug.trace('trigger api on complete.');
	Debug.end();
	Debug.show('Stock.upload');
Пример #6
0
def getBackdateList(params):
    dbManager = SharedMemoryManager.getInstance()
    db = dbManager.query()
    dates = []
    for idx in range(0, date_retrieve_limit + 1):  # 7 days backward + 1 today
        if idx == 0:
            collection_name = 'stock_latest'
        else:
            collection_name = 'stock_{0}'.format(idx)
        # Logger.v('collection_name', collection_name);
        data = list(db[collection_name].find({}, {
            '_id': 0,
            'date': 1
        }).limit(1))
        if data:
            date = DateTime.toString(data[0]['date'])
            # Logger.v('date', date);
            dates.append(date)
        # Logger.v('data', data);

    # Logger.v('dates', sorted(list(set(dates)), reverse=True));
    result = {
        'date': sorted(list(set(dates)), reverse=True)
    }
    return result
Пример #7
0
def save(params, chunk, chunks_info):
    global latest_collection_name, history_collection_name

    data = File.readChunkData(chunk)
    dbManager = SharedMemoryManager.getInstance()
    db = dbManager.query()
    current_index = fn.getNestedElement(chunks_info, 'current', 0)
    total_index = fn.getNestedElement(chunks_info, 'total', len(data))

    date = fn.getNestedElement(params, 'date')
    datetime = DateTime.convertDateTimeFromString(date)
    total_length = len(data)
    queue_info = chunks_info['queue']
    # Logger.v('Running Index:', chunks_info['queue']['running']);
    chunks_info['queue']['current'] += 1
    # Logger.v('Saving from... {0}/{1}, current package: {2}'.format(current_index, total_index, total_length) );
    fn.printProgressBar(queue_info['current'], queue_info['total'],
                        'Processing Chunk Insertion')
    for idx in range(0, total_length):
        # insert stock_latest
        row = data[idx]
        obj_ = transformToLowercase(data=row, datetime=datetime)
        ModelStockIntegrity.update(data=obj_)
        dbManager.addBulkInsert(latest_collection_name, obj_, batch=True)
        # dbManager.addBulkInsert(history_collection_name, obj_, batch=True); # temporary off (need 7 day data only)

        # insert items
        # d = data[idx];
        ModelItem.saveItem(row)
        # fn.printProgressBar(current_index+idx, total_index, 'Processing Item Insertion');

    #ensure all data is save properly
    # dbManager.executeBulkOperations(history_collection_name); # temporary off (need 7 day data only)
    dbManager.executeBulkOperations(latest_collection_name)
    return chunks_info
Пример #8
0
def get(params):
	drug_codes = fn.getNestedElement(params, 'drug_nondrug_code', []);
	state = fn.getNestedElement(params, 'state');
	requester_group = fn.getNestedElement(params, 'requester_group');
	issue_type = fn.getNestedElement(params, 'issue_type');
	dbManager = SharedMemoryManager.getInstance();
	db = dbManager.query();
	match_query = {
		'state': state.replace('_', ' '),
	};
	if drug_codes:
		match_query['drug_nondrug_code'] = {'$in': drug_codes};

	if not requester_group == 'all' and requester_group:
		match_query['requester_group_name'] = requester_group.replace('_', ' ');

	if not issue_type == 'all' and issue_type:
		match_query['issue_type'] = issue_type.replace('_', ' ');

	data = list(db[collection_name].aggregate([
		{
			'$match': match_query,
		},
		{
			'$project': {'_id': 0, 'inserted_at': 0, 'updated_at': 0}
		}
	]));
	data_length = len(data);
	# Logger.v('data length', data_length, data);
	return data;
Пример #9
0
def refreshIsRequired(data, collection_name):
    dbManager = SharedMemoryManager.getInstance()
    db = dbManager.query()
    refresh_collection = False
    mongo_data = list(db[collection_name].find({}))
    # Logger.v('mongo_data', mongo_data);
    unique_values = []
    for row in data:
        obj_ = {}
        unique_value = generateUniqueValue(data=row,
                                           collection_name=collection_name)
        unique_values.append('_'.join(unique_value))
    matched_row = db[collection_name].find(
        {'unique_value': {
            '$in': unique_values
        }})
    matched_result = list(matched_row)
    # Logger.v('matched_result', matched_result)
    if not len(matched_result) == len(mongo_data) or len(
            mongo_data) == 0:  # if there is difference between mongodb and raw
        Logger.v('matched_result len', len(matched_result))
        Logger.v('mongo_data len', len(mongo_data))
        refresh_collection = True
        return refresh_collection
    return refresh_collection
Пример #10
0
def check(params):
    global msia_tz, date_retrieve_limit, date_count, collection_name
    dbManager = SharedMemoryManager.getInstance()
    db = dbManager.query()
    today = DateTime.now(tzinfo=msia_tz)
    start_date = DateTime.getDaysAgo(date_retrieve_limit, datefrom=today)
    durations = DateTime.getBetween([start_date, today],
                                    element='date',
                                    offset=24)['order']
    # offset 24 to include today
    Logger.v('durations', durations)
    data = db[collection_name].aggregate([{
        '$match': {
            'state_updated_at': {
                '$in': durations
            },
            'facility_updated_at': {
                '$in': durations
            }
        }
    }, {
        '$project': {
            '_id': 0,
            'inserted_at': 0,
            'updated_at': 0
        }
    }])
    data = list(data)
    Logger.v('Total stock issue integrity in', date_retrieve_limit, 'days:',
             len(data))
    state_data = {}
    facility_data_by_state = {}

    for idx in range(0, len(data)):
        row = data[idx]
        state_code = fn.getNestedElement(row, 'state_code')
        if state_code not in facility_data_by_state:
            facility_data_by_state[state_code] = {}

        state_data = addIntegrityData(data={
            'row': row,
            'to_update': state_data
        },
                                      category='state')
        facility_data_by_state[state_code] = addIntegrityData(
            data={
                'row': row,
                'to_update': facility_data_by_state[state_code]
            },
            category='facility')

        if date_count > date_retrieve_limit:  # limit loop data/ show data in N days
            break
        date_count = 0
        # reset to 0th day
    return {
        'state': state_data,
        'state_facility': facility_data_by_state,
    }
Пример #11
0
def checkAvailableMonth():
	limit = 6;
	dbManager = SharedMemoryManager.getInstance();
	db = dbManager.query();
	data = list(db[collection_name].find({}, {'_id':0, 'approved_year_month': 1}));
	df = pd.DataFrame(data);
	months = df['approved_year_month'].sort_values(ascending=False).unique().tolist();
	return months[:limit];
Пример #12
0
def reset(date):
	global collection_name, stock_issue_options;
	stock_issue_options = {};
	dbManager = SharedMemoryManager.getInstance();
	db = dbManager.query();
	query = {
		'upload_date': date,
	};
	db[collection_name].delete_many(query);
Пример #13
0
def reset(date):
    global unique_facility
    unique_facility = []
    dbManager = SharedMemoryManager.getInstance()
    db = dbManager.query()
    query = {
        'date': date,
    }
    db[collection_name].delete_many(query)
Пример #14
0
def generateIndex():
    global collection_name
    dbManager = SharedMemoryManager.getInstance()
    db = dbManager.query()
    db[collection_name].create_index([('state_updated_at', -1),
                                      ('facility_updated_at', -1)])
    db[collection_name].create_index([('state_name', 1), ('state_code', 1),
                                      ('facility_name', 1),
                                      ('facility_code', 1)])
Пример #15
0
def generateIndex():
    global latest_collection_name, history_collection_name
    dbManager = SharedMemoryManager.getInstance()
    db = dbManager.query()
    for collection_name in [latest_collection_name, history_collection_name]:
        db[collection_name].create_index([('date', -1), ('item_code', 1),
                                          ('state', 1), ('facility_code', 1),
                                          ('requester_unit_code', 1)])
        db[collection_name].create_index([('item_desc', TEXT)],
                                         default_language='english')
Пример #16
0
def refreshCollection(data, collection_name):
    dbManager = SharedMemoryManager.getInstance()
    db = dbManager.query()
    db[collection_name].delete_many({})
    for row in data:
        unique_value = generateUniqueValue(data=row,
                                           collection_name=collection_name)
        obj_ = generateKeyValue(data=row)
        obj_.update({'unique_value': '_'.join(unique_value)})
        # Logger.v('obj_', obj_);
        dbManager.addBulkInsert(collection_name, obj_, batch=True)
    dbManager.executeBulkOperations(None)
Пример #17
0
def createSchedules(args={}):  #upid, page_type
    global filter_page_type
    Debug = DebugManager.DebugManager()
    Debug.start()
    dbManager = SharedMemoryManager.getInstance()
    # crawl_duration = fn.getNestedElement(fn.config,'CRAWL_DURATION', 12);
    incomplete_task, incomplete_task_count = checkRemaining()
    new_queue_count = 0
    # Logger.v(incomplete_task_count, incomplete_task, filter_page_type);
    extra_params = {
        'crawl_comment': fn.getNestedElement(args, 'crawl_comment', None)
    }
    extra_params = {k: v for k, v in extra_params.items() if v is not None}

    for platform in filter_page_type:
        if args and not platform in fn.getNestedElement(
                args, 'page_type', platform).split(','):
            Logger.v('Skip Platform:%s' % (platform))
            continue
            # skip when page_type appear and not same
        pages = fn.getNestedElement(args, 'pages.{0}'.format(platform), [])
        Logger.v('platform', platform)
        # Logger.v('page', args['pages']['budget']);
        for page in pages:  #Create queue for each
            # Logger.v('page', page);
            Queue.create(page,
                         extra_params=extra_params,
                         priority=fn.getNestedElement(args, 'priority',
                                                      'daily'),
                         batch=True)
            new_queue_count += 1
            Logger.v('new_queue_count', new_queue_count)
        # Debug.trace();

    Logger.v('Incomplete:%s, New Queue: %s' %
             (incomplete_task_count, new_queue_count))
    if incomplete_task_count > (new_queue_count *
                                int(fn.config['DEBUG_CRAWL_WARNING']) /
                                100) or incomplete_task_count > int(
                                    fn.config['DEBUG_CRAWL_WARNING']):
        # Mail.send('[%s]Incomplete Crawl [%s], Current Schedule: [%s]'%(DateTime.getReadableDate(DateTime.now()),
        # 	incomplete_task_count, new_queue_count),
        # 		 fn.dumps(incomplete_task, encode=False)
        # );
        pass

    result = {
        'pending_count': new_queue_count,
        'incomplete_count': incomplete_task_count
    }
    dbManager.executeBulkOperations(None)
    # Debug.show('Create Schedule');
    return Params.generate(True, result)
Пример #18
0
def accept_conn(s):
    sharedManager = SharedMemoryManager.Manager()
    dbManager = SharedMemoryManager.getInstance()
    db = dbManager.query(index=False)
    dbManager.close()
    while True:
        try:
            #accept and receive socket connection
            conn, addr = s.accept()
            _thread.start_new_thread(process, (conn, addr, dbManager))
        except Exception as ex:
            print(ex)
            traceback.print_exc()
Пример #19
0
def getPath(params):
    global upload_log_collection_name
    dbManager = SharedMemoryManager.getInstance()
    db = dbManager.query()
    date = fn.getNestedElement(params, 'date')
    path = fn.getNestedElement(params, 'path')
    group = fn.getNestedElement(params, 'group')
    data_part = fn.getNestedElement(params, 'data_part', 'default')
    if type(group) == str:
        group = group.lower()
    if type(data_part) == str:
        data_part = data_part.lower()

    dbManager = SharedMemoryManager.getInstance()
    db = dbManager.query()
    query = {
        'date': date,
        'collection': group,
    }
    stock_upload_log = list(db[upload_log_collection_name].find(
        query, {
            '_id': 0,
            'inserted_at': 0,
            'updated_at': 0
        }))
    # Logger.v('stock_upload_log', stock_upload_log);
    part_of_the_day = []
    if stock_upload_log:
        part_of_the_day = stock_upload_log[0]['part_of_the_day']
        paths = []
        for part in part_of_the_day:
            paths.append(stock_upload_log[0][part])
    updateLog(params)
    should_reset = True
    if not data_part in part_of_the_day:
        should_reset = False if part_of_the_day else True
        paths = [path]
    Logger.v('Upload paths:', paths, should_reset)
    return paths, should_reset
Пример #20
0
def updateLog(params):
    global upload_log_collection_name
    dbManager = SharedMemoryManager.getInstance()
    db = dbManager.query()
    date = fn.getNestedElement(params, 'date')
    path = fn.getNestedElement(params, 'path')
    group = fn.getNestedElement(params, 'group')
    data_part = fn.getNestedElement(params, 'data_part', 'default')
    if type(group) == str:
        group = group.lower()
    if type(data_part) == str:
        data_part = data_part.lower()
    query = {
        'date': date,
        'collection': group,
    }
    stock_upload_log = list(db[upload_log_collection_name].find(
        query, {
            '_id': 0,
            'inserted_at': 0,
            'updated_at': 0
        }))
    # Logger.v('stock_upload_log', stock_upload_log);
    if not stock_upload_log and group == 'stock':
        # Logger.v('backdate collection');
        ModelStock.backdateCollection()

    # Logger.v('update upload_log collection');
    values = {}
    if stock_upload_log:
        if 'part_of_the_day' not in values:
            values['part_of_the_day'] = []

        for part in stock_upload_log[0]['part_of_the_day']:
            # Logger.v('part', part);
            values['part_of_the_day'].append(part)
            values[part] = stock_upload_log[0][part]
        if data_part not in stock_upload_log[0]['part_of_the_day']:
            values['part_of_the_day'].append(data_part)
            values[data_part] = path

    else:
        values['part_of_the_day'] = [data_part]
        values[data_part] = path
    # Logger.v('query', query, values)
    # exit();
    dbManager.addBulkUpdate(upload_log_collection_name,
                            query,
                            values,
                            upsert=True,
                            batch=False)
Пример #21
0
def checkEmpty(params):
	global global_check_data;
	dbManager = SharedMemoryManager.getInstance();
	db = dbManager.query();
	custom_params = copy.deepcopy(params);
	report_keys = fn.getNestedElement(params, 'keys.report', ['procurement', 'budget']);
	interval = fn.getNestedElement(params, 'interval', 1);
	past_dates = DateTime.getPastDate(count=12, duration=interval); # check previous 12 month data
	year = Crawl.extractYear(data=past_dates[0]);
	first_date = past_dates[0][-1][0];
	last_date = past_dates[0][0][1];
	# Logger.v('first_date', first_date, 'last_date', last_date);
	state_by = 'state_code';
	states = list(db['state'].find({},{'_id': 0, state_by: 1}));
	result = {};
	datetime = DateTime.toString(DateTime.now(tzinfo=msia_tz), date_format='%Y-%m-%d-%H-%M-%S');

	custom_params['first_date'] = first_date;
	custom_params['last_date'] = last_date;
	custom_params['state_by'] = state_by;
	custom_params['states'] = states;
	temp_result = generateTemplate(params=custom_params);

	for rk in report_keys:
		if rk not in global_check_data:
			global_check_data[rk] = [];

		for y in year:
			root_path = '{0}/{1}/year_{2}'.format(crawl_folder, rk, y);
			openDir(root_path, rk);
			for gcd in global_check_data[rk]:
				date = gcd.split('_')[0];
				state = gcd.split('_')[1];
				if DateTime.inrange(date, [first_date, last_date]):
					try:
						temp_result[rk][date][state] += 1;
					except Exception as e:
						# Logger.v('Main.checkEmpty:', e);
						pass;

	for rk in temp_result:
		if rk not in result:
			result[rk] = [];
		for date in temp_result[rk]:
			result[rk].append(temp_result[rk][date]);

		filename = '{0}/{1}_check_moh_empty'.format(test_folder, rk);
		# filename = 'tests/{0}_{1}_check_moh_empty'.format(rk, datetime);
		fn.writeExcelFile(filename=filename, data=result[rk]);
	global_check_data = {};
	return result;
Пример #22
0
def retrieveOption(collection_name='state', show_keys=[], hide_keys=[]):
    dbManager = SharedMemoryManager.getInstance()
    db = dbManager.query()
    return_keys = {}
    for key in show_keys:
        return_keys.update({key: 1})
    for key in hide_keys:
        return_keys.update({key: 0})

    # Logger.v('return_keys', return_keys);
    if return_keys:
        result = list(db[collection_name].find({}, return_keys))
    else:
        result = list(db[collection_name].find({}))

    return result
Пример #23
0
def get(params):
    dbManager = SharedMemoryManager.getInstance()
    db = dbManager.query()

    data = []
    getCollectionName(params)
    query = getQuery(params)
    Logger.v('Get data from collection:', latest_collection_name)
    data = list(db[latest_collection_name].aggregate([{
        '$match': query
    }, {
        '$project': {
            '_id': 0,
            'inserted_at': 0,
            'updated_at': 0
        }
    }]))
    Logger.v('data length', len(data))
    return data
Пример #24
0
def getCollectionName(params):
    global latest_collection_name
    latest_collection_name = 'stock_latest'
    # set default;
    dbManager = SharedMemoryManager.getInstance()
    db = dbManager.query()
    data = list(db[latest_collection_name].find({}, {
        '_id': 0,
        'date': 1
    }).limit(1))
    if data:
        latest_date_string = DateTime.toString(data[0]['date'])
        latest_date = DateTime.convertDateTimeFromString(latest_date_string)
        date_string = fn.getNestedElement(params, 'date', None)
        if date_string:
            date = DateTime.convertDateTimeFromString(date_string)
            different = latest_date - date
            day_diff = math.floor(different.total_seconds() / float(86400))
            if day_diff > 0:
                latest_collection_name = 'stock_{0}'.format(day_diff)
Пример #25
0
def backdateCollection(days=date_retrieve_limit):
    global latest_collection_name
    dbManager = SharedMemoryManager.getInstance()
    for idx in range(days, 0, -1):
        collection_names = dbManager.getCollectionNames()
        col_name = 'stock_{0}'.format(idx)
        if idx > 1:
            previous_col_name = 'stock_{0}'.format(idx - 1)
        else:
            previous_col_name = latest_collection_name

        if col_name in collection_names:
            dbManager.dropCollection(col_name)

        if previous_col_name in collection_names:
            Logger.v('rename', previous_col_name, 'to', col_name)
            dbManager.renameCollection(previous_col_name, col_name)
        else:
            Logger.v('create', col_name)
            dbManager.createCollection(col_name)
    dbManager.createCollection(latest_collection_name)
Пример #26
0
def getData(params):
    report_name = Report.getReportName(params)
    dbManager = SharedMemoryManager.getInstance()
    db = dbManager.query()
    limit_data = 5000
    # 5000 (used 30+- second)
    query = generateQuery(params=params)
    # Logger.v('query', query);

    data = list(db[report_name].find(query, {'_id': 0}))[:limit_data]
    # direct load from mongodb

    # data = File.readJson('crawled_data/testing_purpose/{0}.json'.format(report_name)); # TEST using demo data

    # data = File.readJson('crawled_data/testing_purpose/pivot_{0}.json'.format(report_name)); # TEST using pivot data
    # lambda_function = lambda d: d['ptj_code'] in ['010619'];
    # data = list(filter(lambda_function, data));
    # fn.writeExcelFile(filename='crawled_data/testing_purpose/pivot_{0}'.format(report_name), data=data);
    # fn.writeJSONFile(filename='crawled_data/testing_purpose/pivot_{0}.json'.format(report_name), data=data);

    Logger.v('data length', len(data))
    return data
Пример #27
0
def generateCrawlParam(params):
    Debug = DebugManager.DebugManager()
    Debug.start()
    global pass_month_quantity
    dbManager = SharedMemoryManager.getInstance()
    db = dbManager.query()
    crawl_params = {}
    limit_for_test = 10
    report_keys = fn.getNestedElement(params, 'keys.report',
                                      ['budget', 'procurement'])
    interval = fn.getNestedElement(params, 'interval', 1)
    filter_facility_code = fn.getNestedElement(params, 'filter.facility_code',
                                               True)
    check_empty = fn.getNestedElement(params, 'schedule_params.check_empty',
                                      False)
    today = fn.getNestedElement(
        params, 'schedule_params.today',
        DateTime.toString(DateTime.now(tzinfo=msia_tz)))
    # Logger.v('filter_facility_code', filter_facility_code);
    if check_empty:
        # past_dates = DateTime.getPastDate(count=pass_month_quantity, duration=interval);
        past_dates = DateTime.getPastDate(
            count=pass_month_quantity,
            duration=interval,
            end=DateTime.convertDateTimeFromString(today))
        # Logger.v('past_dates', past_dates);
        # exit();
    else:
        past_dates = DateTime.getPastDate(count=pass_month_quantity,
                                          duration=interval)

    # Logger.v('past_dates', past_dates);
    state_codes = retrieveOption(collection_name='state',
                                 show_keys=['state_code'],
                                 hide_keys=['_id'])
    state_code = extractListByKey(data=state_codes, key='state_code')
    facility_codes = retrieveOption(collection_name='facility',
                                    show_keys=['facility_code'],
                                    hide_keys=['_id'])
    facility_code = extractListByKey(data=facility_codes, key='facility_code')
    for key in report_keys:
        # Logger.v('collection', key, past_dates[0]);
        Debug.trace()
        if key not in crawl_params:
            crawl_params[key] = []
        mongo_data = list(db[key].find({}, {}))

        if len(mongo_data) == 0:
            dates = past_dates[0][:]
        else:
            dates = past_dates[0][:1]

        year = extractYear(data=dates)
        # Logger.v('year', year);
        # Logger.v('filter_facility_code', filter_facility_code);
        if key == 'budget':
            if not filter_facility_code:
                iteration = 0
                total = len(year) * len(state_code)
                # fn.printProgressBar(iteration=iteration, total=total);
                for y in year:
                    for sc in state_code:
                        obj_ = {
                            'financial_year': y,
                            'state_code': sc,
                            'page_type': key,
                            'upid': '_'.join([sc, y]),
                            'url': api_links[key].format(sc, y, ''),
                            'start_date': today,
                            'end_date': today,
                        }
                        if obj_ not in crawl_params[key]:
                            crawl_params[key].append(obj_)
                            # Logger.v('len(crawl_param])', len(crawl_params[key]));
                        iteration += 1
                        # fn.printProgressBar(iteration=iteration, total=total);
            else:
                iteration = 0
                total = len(year) * len(state_code) * len(
                    facility_code[:limit_for_test])
                # fn.printProgressBar(iteration=iteration, total=total);
                for y in year:
                    for sc in state_code:
                        for fc in facility_code[:limit_for_test]:
                            obj_ = {
                                'financial_year': y,
                                'state_code': sc,
                                'page_type': key,
                                'upid': '_'.join([sc, y, fc]),
                                'facility_code': fc,
                                'url': api_links[key].format(sc, y, fc),
                                'start_date': today,
                                'end_date': today,
                            }
                            if obj_ not in crawl_params[key]:
                                crawl_params[key].append(obj_)
                                # Logger.v('len(crawl_param])', len(crawl_params[key]));
                            iteration += 1
                            # fn.printProgressBar(iteration=iteration, total=total);

        elif key == 'procurement':
            if not filter_facility_code:
                for past_duration in dates:
                    start_date = DateTime.toString(
                        DateTime.getDaysAgo(days_to_crawl=-1,
                                            datefrom=past_duration[0]))
                    end_date = DateTime.toString(
                        DateTime.getDaysAgo(days_to_crawl=1,
                                            datefrom=past_duration[1]))
                    for sc in state_code:
                        obj_ = {
                            'state_code':
                            sc,
                            'start_date':
                            start_date,
                            'end_date':
                            end_date,
                            'page_type':
                            key,
                            'upid':
                            '_'.join([sc, start_date, end_date]),
                            'url':
                            api_links[key].format(sc,
                                                  start_date.replace('-', ''),
                                                  end_date.replace('-', ''),
                                                  ''),
                        }

                        if obj_ not in crawl_params[key]:
                            crawl_params[key].append(obj_)
                            # Logger.v('len(crawl_param])', len(crawl_params[key]));
            else:
                for past_duration in dates:
                    start_date = DateTime.toString(
                        DateTime.getDaysAgo(days_to_crawl=-1,
                                            datefrom=past_duration[0]))
                    end_date = DateTime.toString(
                        DateTime.getDaysAgo(days_to_crawl=1,
                                            datefrom=past_duration[1]))
                    for sc in state_code:
                        for fc in facility_code[:limit_for_test]:
                            obj_ = {
                                'state_code':
                                sc,
                                'start_date':
                                start_date,
                                'end_date':
                                end_date,
                                'page_type':
                                key,
                                'facility_code':
                                fc,
                                'upid':
                                '_'.join([sc, start_date, end_date, fc]),
                                'url':
                                api_links[key].format(
                                    sc, start_date.replace('-', ''),
                                    end_date.replace('-', ''), fc)
                            }
                            if obj_ not in crawl_params[key]:
                                crawl_params[key].append(obj_)
                                # Logger.v('len(crawl_param])', len(crawl_params[key]));

    for c in crawl_params:
        # Logger.v('crawl_params', c, len(crawl_params[c]));
        fn.writeExcelFile(filename='{0}/{1}'.format(test_folder, c),
                          data=crawl_params[c])
    Logger.v('crawl_params', len(crawl_params))
    Debug.show('Generate Crawl Params')
    return crawl_params
Пример #28
0
def getDropdownList(params):
	result = {};
	dbManager = SharedMemoryManager.getInstance();
	db = dbManager.query();
	custom_params = copy.deepcopy(params);

	for key in ['state', 'ptj', 'facility', 'facility_type']:
		custom_params['key'] = key;
		# read from file
		# filename = '{0}/{1}/2020-02-28.json'.format(main_folder, key);
		# data = File.readLatestFile(directory='/'.join([main_folder, key]), extension='.json');

		# read from mongodb
		try:
			data = list(db[key].find({}, {'_id': 0}));
		except Exception as ex:
			Logger.v(ex);
			data = File.readLatestFile(directory='/'.join([main_folder, key]), extension='.json');

		# Logger.v('data', data);
		accessible_data = getAccessibleData(params=custom_params, data=data);

		result[key] = organiseStructure(data=accessible_data, key=key);

	result['duration'] = [
		{
			'id': 'yearly',
			'name': 'Yearly',
		},
		{
			'id': 'monthly',
			'name': 'Monthly',
		},
	];
	result['year'] = [
		{
			'id': 2020,
			'name': '2020',
		},
		{
			'id': 2019,
			'name': '2019',
		},
	];
	result['procurement_type'] = [
		{
			'id': 'type1',
			'name': 'Type 1',
		},
		{
			'id': 'type2',
			'name': 'Type 2',
		},
	];
	result['budget_type'] = [
		{
			'id': 'db',
			'name': 'Dasar Baru',
		},
		{
			'id': 'oo',
			'name': 'One Off',
		},
	];
	return Params.generate(True, result);
Пример #29
0
def getIntegrity(params, data):
    dbManager = SharedMemoryManager.getInstance()
    db = dbManager.query()
    check_data = fn.getNestedElement(data, 'check_data')
    facility = ModelFacility.getActiveFacility()
    filter_key = fn.getNestedElement(params, 'filter_key')
    durations = fn.getNestedElement(params, 'durations')
    result = fn.getNestedElement(data, 'to_update')
    state_data = fn.getNestedElement(data, 'state')
    facility_data_by_state = fn.getNestedElement(data, 'facility')
    data_list = getFacilityByState(params=params, data=check_data)

    for key in data_list:
        row = fn.getNestedElement(data_list, key)
        count = getTotalCount(params={
            'filter_key': filter_key,
            'key': key
        },
                              data={
                                  'row': row,
                                  'facility': facility
                              })
        obj_ = {
            'id': fn.convertToSnakecase(fn.getNestedElement(row, 'id')),
            'name': fn.getNestedElement(row, 'name'),
            'code': fn.getNestedElement(row, 'code'),
            'data': [],
        }
        for idx in range(len(durations) - 1, -1, -1):
            date = durations[idx]
            previous_date = DateTime.toString(
                DateTime.getDaysAgo(1, datefrom=date))
            # Logger.v('date', date, 'previous_date', previous_date);
            if filter_key:
                date_count = fn.getNestedElement(
                    facility_data_by_state,
                    '{0}.{1}.{2}'.format(filter_key, key, date), 0)
                if not date_count:
                    date_count = 0
            else:
                date_count = 0
                # do not include those positive, count missing facility quantity only
                # date_count = fn.getNestedElement(state_data, '{0}.{1}'.format(key, date), 0);
            if filter_key:
                val = date_count - count
            else:
                val = 0
            obj_['data'].append({
                previous_date:
                val,  # negative value is missing, 0 mean complete, positive value is not found from user upload facility	
            })
        if filter_key:
            # Logger.v('recursive end')
            pass

        else:
            obj_['facility'] = []
            obj_['facility'] = getIntegrity(params={
                'filter_key': key,
                'durations': durations,
            },
                                            data={
                                                'state': state_data,
                                                'facility':
                                                facility_data_by_state,
                                                'to_update': obj_['facility'],
                                                'check_data': check_data,
                                            })
        result.append(obj_)

    # Logger.v('result', result)
    return result
Пример #30
0
def generateIndex():
	global collection_name;
	dbManager = SharedMemoryManager.getInstance();
	db = dbManager.query();
	db[collection_name].create_index([('approved_year_month', -1), ('drug_nondrug_code', 1), ('state_name', 1), ('facility_code', 1), ('requester_group_name', 1)]);