# Helper functions adder = lambda x, y: x + y def normalize_phrase(phrase): if type(phrase) == str: return phrase.lower() else: if np.isnan(phrase): return '' else: assert False argparser.add_argument('-b', type=str, help='Target building name', dest='building') args = argparser.parse_args() building = args.building # Transform parsed lables used in Scrabble for Oracle. df = pd.read_csv('{0}_rawmetadata.csv'.format(building))\ .set_index('SourceIdentifier') with open('{0}_char_label_dict.json'.format(building), 'r') as fp: char_label_dict = json.load(fp) parse_dict = dict()
from jasonhelper import argparser argparser.add_argument('-bl', type='slist', dest='buildings') argparser.add_argument('-nl', type='ilist', dest='sample_nums') argparser.add_argument('-t', type=str, dest='target_building') argparser.add_argument('-step', type=int, dest='step_num') argparser.add_argument('-iter', type=int, dest='iter_num')
sentence = ''.join( [row[0] for row in fullparsing['VendorGivenName']]) rawpoint.metadata = {'VendorGivenName': sentence} rawpoint.save() print('Finished adding full parsing') # add tagsets def parse_tagsets(building): with open('../groundtruth/{0}_tagsets.json'.format(building), 'r') as fp: true_tagsets = json.load(fp) for srcid, tagsets in true_tagsets.items(): point = LabeledMetadata.objects(srcid=srcid, building=building)\ .upsert_one(srcid=srcid, building=building) point.tagsets = tagsets point_tagset = sel_point_tagset(tagsets) if not point_tagset: point_tagset = 'none' point.point_tagset = point_tagset point.save() if __name__ == '__main__': argparser.add_argument('-b', type=str, dest='building', required=True) args = argparser.parse_args() building = args.building if building in CMU_BUILDINGS: parse_tagsets(building) parse_fullparsing(building, write_rawmetadata=True)
def remove_invalid_srcids(building): with open('config/invalid_srcids.json', 'r') as fp: invalid_srcids_dict = json.load(fp) if building not in invalid_srcids_dict: return None invalid_srcids = invalid_srcids_dict[building] print('{0} invalid points are removed.'.format(len(invalid_srcids))) for srcid in invalid_srcids: RawMetadata.objects(srcid=srcid).delete() LabeledMetadata.objects(srcid=srcid).delete() if __name__ == '__main__': argparser.add_argument('-b', type=str, dest='building', required=True) argparser.add_argument('-top', type='bool', dest='topclass_flag', default=False) # add raw metadata args = argparser.parse_args() building = args.building if building == 'uva_cse': load_uva_building(building) print('UVA CSE Done') sys.exit() elif building in UCB_BUILDINGS: extract_raw_ucb_labels() basedir = './groundtruth/'
if max_score < 0.75: return None # TODO: Selection mechanism needs to be more smart. # Currently mapping # {'pressure', 'filter', 'discharge', 'his', 'weatherPoint', 'air', 'sp'} # -> # "Discharge_Air" cand = '' for tagset, score in score_dict.items(): if score == max_score: if len(cand.split('_')) < len(tagset.split('_')): cand = tagset return cand argparser.add_argument('-src', type=str, dest='srcfile', help='Haystack instance in JSON or Zinc') args = argparser.parse_args() ################ Init base information ############## # Load Brick brick_tag_g = rdflib.Graph() brick_tag_g.parse('brick/BrickTag.ttl', format='turtle') res = brick_tag_g.query(""" select ?s where { ?s <http://www.w3.org/2000/01/rdf-schema#subClassOf>+ <https://brickschema.org/schema/1.0.1/BrickFrame#Tag> . }""") brick_tags = [row[0].split('#')[-1] for row in res] with open('brick/hb_map.json', 'r') as fp: hb_map = json.load(fp)