def test_read_one(line_name): l = get_state_examination_data_by_line_name(line_name) print(enc1(line_name)) print 'check_year unit_1 unit_2 unit_3 unit_4 unit_5 unit_6 unit_7 unit_8 line_state' for i in l: print ' %d %05s %05s %05s %05s %05s %05s %05s %05s %05s' % \ (i['check_year'], i['unit_1'], i['unit_2'], i['unit_3'], i['unit_4'], i['unit_5'], i['unit_6'], i['unit_7'], i['unit_8'], i['line_state'])
def generate_unit_probability(db): def get_domains(alist): d = OrderedDict(alist[0][1]) return d.keys() def get_desc(alist, name): ret = [] for i in alist: if i['parent'] == name: ret.append(i['name']) return ';\n'.join(ret) # client = pymongo.MongoClient('192.168.1.8', 27017) # db = client['kmgd'] if 'bayesian_nodes' in db.collection_names(False): db.drop_collection('bayesian_nodes') path = ur'jiakongztpj.json' std = None with open(path) as f: std = json.loads(f.read()) collection = get_collection('state_examination') pipeline = [ # {'$unwind':'$line_name'}, { "$group": { "_id": "$line_name", "count": { "$sum": 1 } } }, ] lines = list(collection.aggregate(pipeline)) linenames = map(lambda x: x['_id'], lines) i = 0 l = [] for line_name in linenames: data = get_state_examination_data_by_line_name(line_name) print('%s:%d' % (enc1(line_name), len(data))) o = calc_probability_unit(data) # print (o) for k in o.keys(): o1 = {} o1['name'] = k o1['display_name'] = UNIT_NAME_MAPPING[k] o1['line_name'] = line_name o1['description'] = get_desc(std, UNIT_NAME_MAPPING[k]) o1['conditions'] = o[k] o1['domains'] = get_domains(o[k]) l.append(o1) i += 1 # if i > 30: # break # with codecs.open(ur'd:\aaa.json', 'w', 'utf-8-sig') as f: # f.write(json.dumps(l, ensure_ascii=False, indent=4)) collection = get_collection('bayesian_nodes') for i in l: collection.save(i)
def test_find_abnormal(): collection = get_collection('state_examination') ids = [] for i in list(collection.find({})): for j in range(1, 9): if i.has_key('unit_%d' % j): if ' ' in i['unit_%d' % j]: print('%s:%d' % (enc1(i['line_name']), i['check_year'])) if i.has_key('line_state' ): if ' ' in i['line_state']: print('line_state%s:%d' % (enc1(i['line_name']), i['check_year'])) if '-' in i['line_name']: print('%s:%d' % (enc1(i['line_name']), i['check_year'])) ids.append(i['_id']) if not i['line_name'][-1] == u'线': print('%s:%d' % (enc1(i['line_name']), i['check_year'])) ids.append(i['_id'])
def test_calc_past_year(past_years_list=[]): def get_domains(alist): d = OrderedDict(alist[0][1]) return d.keys() def convert_tuple(adict): ret = {} for k in adict.keys(): key = ':'.join([k[0], k[1]]) ret[key] = adict[k] return ret collection = get_collection('state_examination') pipeline = [ # {'$unwind':'$line_name'}, {"$group": {"_id": "$line_name", "count": {"$sum": 1}}}, ] lines = list(collection.aggregate(pipeline)) linenames = map(lambda x:x['_id'], lines) i = 0 ret = [] for line_name in linenames: l = [] result = {} data = get_state_examination_data_by_line_name(line_name, past_years_list) check_year = [] for i in data: check_year.append(i['check_year']) print('%s:%d:%s' % (enc1(line_name), len(data), str(check_year) ) ) if len(check_year)>1: o = calc_probability_unit(data) result['line_name'] = line_name result['check_year'] = check_year for k in o.keys(): o1 = {} o1['name'] = k # o1['display_name'] = UNIT_NAME_MAPPING[k] # o1['line_name'] = line_name o1['conditions'] = o[k] o1['domains'] = get_domains(o[k]) l.append(o1) cond = {} cond['line_state'] = [] o = calc_probability_line() cond['line_state'].extend(o['line_state']) for node in l: # name = node['name'] # domains = node['domains'] cond[node['name']] = node['conditions'] g = build_bbn_from_conditionals(cond) result['II'] = convert_tuple(g.query(line_state = 'II')) result['III'] = convert_tuple(g.query(line_state = 'III')) result['IV'] = convert_tuple(g.query(line_state = 'IV')) ret.append(result) # break else: print('need 2 years or more') with codecs.open(ur'd:\2010_2014.json', 'w', 'utf-8-sig') as f: f.write(json.dumps(ret, ensure_ascii=False, indent=4))
def test_find_abnormal(): collection = get_collection('state_examination') ids = [] for i in list(collection.find({})): for j in range(1, 9): if i.has_key('unit_%d' % j): if ' ' in i['unit_%d' % j]: print('%s:%d' % (enc1(i['line_name']), i['check_year'])) if i.has_key('line_state'): if ' ' in i['line_state']: print('line_state%s:%d' % (enc1(i['line_name']), i['check_year'])) if '-' in i['line_name']: print('%s:%d' % (enc1(i['line_name']), i['check_year'])) ids.append(i['_id']) if not i['line_name'][-1] == u'线': print('%s:%d' % (enc1(i['line_name']), i['check_year'])) ids.append(i['_id'])
def generate_unit_probability(db): def get_domains(alist): d = OrderedDict(alist[0][1]) return d.keys() def get_desc(alist, name): ret = [] for i in alist: if i['parent'] == name: ret.append(i['name']) return ';\n'.join(ret) # client = pymongo.MongoClient('192.168.1.8', 27017) # db = client['kmgd'] if 'bayesian_nodes' in db.collection_names(False): db.drop_collection('bayesian_nodes') path = ur'jiakongztpj.json' std = None with open(path) as f: std = json.loads(f.read()) collection = get_collection('state_examination') pipeline = [ # {'$unwind':'$line_name'}, {"$group": {"_id": "$line_name", "count": {"$sum": 1}}}, ] lines = list(collection.aggregate(pipeline)) linenames = map(lambda x:x['_id'], lines) i = 0 l = [] for line_name in linenames: data = get_state_examination_data_by_line_name(line_name) print('%s:%d' % (enc1(line_name), len(data) ) ) o = calc_probability_unit(data) # print (o) for k in o.keys(): o1 = {} o1['name'] = k o1['display_name'] = UNIT_NAME_MAPPING[k] o1['line_name'] = line_name o1['description'] = get_desc(std, UNIT_NAME_MAPPING[k]) o1['conditions'] = o[k] o1['domains'] = get_domains(o[k]) l.append(o1) i += 1 # if i > 30: # break # with codecs.open(ur'd:\aaa.json', 'w', 'utf-8-sig') as f: # f.write(json.dumps(l, ensure_ascii=False, indent=4)) collection = get_collection('bayesian_nodes') for i in l: collection.save(i)
def test_read_all_records(): collection = get_collection('state_examination') l = list(collection.find({})) namesmap = {} for i in l: if i.has_key('line_state') and i['line_state'] in ['III', 'IV']: if not namesmap.has_key(i['line_name']): namesmap[i['line_name']] = [] else: namesmap[i['line_name']].append(i) m1 = {} for k in namesmap.keys(): if len(namesmap[k])>3: m1[k] = namesmap[k] for k in m1.keys(): print('%s:%d,%s' % (enc1(k), len(m1[k]), str([i['line_state'] for i in m1[k]])))
def test_read_all_records(): collection = get_collection('state_examination') l = list(collection.find({})) namesmap = {} for i in l: if i.has_key('line_state') and i['line_state'] in ['III', 'IV']: if not namesmap.has_key(i['line_name']): namesmap[i['line_name']] = [] else: namesmap[i['line_name']].append(i) m1 = {} for k in namesmap.keys(): if len(namesmap[k]) > 3: m1[k] = namesmap[k] for k in m1.keys(): print('%s:%d,%s' % (enc1(k), len(m1[k]), str([i['line_state'] for i in m1[k]])))
def test_calc_past_year(past_years_list=[]): def get_domains(alist): d = OrderedDict(alist[0][1]) return d.keys() def convert_tuple(adict): ret = {} for k in adict.keys(): key = ':'.join([k[0], k[1]]) ret[key] = adict[k] return ret collection = get_collection('state_examination') pipeline = [ # {'$unwind':'$line_name'}, { "$group": { "_id": "$line_name", "count": { "$sum": 1 } } }, ] lines = list(collection.aggregate(pipeline)) linenames = map(lambda x: x['_id'], lines) i = 0 ret = [] for line_name in linenames: l = [] result = {} data = get_state_examination_data_by_line_name(line_name, past_years_list) check_year = [] for i in data: check_year.append(i['check_year']) print('%s:%d:%s' % (enc1(line_name), len(data), str(check_year))) if len(check_year) > 1: o = calc_probability_unit(data) result['line_name'] = line_name result['check_year'] = check_year for k in o.keys(): o1 = {} o1['name'] = k # o1['display_name'] = UNIT_NAME_MAPPING[k] # o1['line_name'] = line_name o1['conditions'] = o[k] o1['domains'] = get_domains(o[k]) l.append(o1) cond = {} cond['line_state'] = [] o = calc_probability_line() cond['line_state'].extend(o['line_state']) for node in l: # name = node['name'] # domains = node['domains'] cond[node['name']] = node['conditions'] g = build_bbn_from_conditionals(cond) result['II'] = convert_tuple(g.query(line_state='II')) result['III'] = convert_tuple(g.query(line_state='III')) result['IV'] = convert_tuple(g.query(line_state='IV')) ret.append(result) # break else: print('need 2 years or more') with codecs.open(ur'd:\2010_2014.json', 'w', 'utf-8-sig') as f: f.write(json.dumps(ret, ensure_ascii=False, indent=4))