示例#1
0
# Helper functions

adder = lambda x, y: x + y

def normalize_phrase(phrase):
    if type(phrase) == str:
        return phrase.lower()
    else:
        if np.isnan(phrase):
            return ''
        else:
            assert False

argparser.add_argument('-b', 
                       type=str, 
                       help='Target building name', 
                       dest='building')
args = argparser.parse_args()

building = args.building


# Transform parsed lables used in Scrabble for Oracle.

df = pd.read_csv('{0}_rawmetadata.csv'.format(building))\
           .set_index('SourceIdentifier')
with open('{0}_char_label_dict.json'.format(building), 'r') as fp:
    char_label_dict = json.load(fp)

parse_dict = dict()
示例#2
0
from jasonhelper import argparser

argparser.add_argument('-bl', type='slist', dest='buildings')
argparser.add_argument('-nl', type='ilist', dest='sample_nums')
argparser.add_argument('-t', type=str, dest='target_building')
argparser.add_argument('-step', type=int, dest='step_num')
argparser.add_argument('-iter', type=int, dest='iter_num')
示例#3
0
            sentence = ''.join(
                [row[0] for row in fullparsing['VendorGivenName']])
            rawpoint.metadata = {'VendorGivenName': sentence}
            rawpoint.save()

    print('Finished adding full parsing')


# add tagsets
def parse_tagsets(building):
    with open('../groundtruth/{0}_tagsets.json'.format(building), 'r') as fp:
        true_tagsets = json.load(fp)
    for srcid, tagsets in true_tagsets.items():
        point = LabeledMetadata.objects(srcid=srcid, building=building)\
                               .upsert_one(srcid=srcid, building=building)
        point.tagsets = tagsets
        point_tagset = sel_point_tagset(tagsets)
        if not point_tagset:
            point_tagset = 'none'
        point.point_tagset = point_tagset
        point.save()


if __name__ == '__main__':
    argparser.add_argument('-b', type=str, dest='building', required=True)
    args = argparser.parse_args()
    building = args.building

    if building in CMU_BUILDINGS:
        parse_tagsets(building)
        parse_fullparsing(building, write_rawmetadata=True)
示例#4
0
def remove_invalid_srcids(building):
    with open('config/invalid_srcids.json', 'r') as fp:
        invalid_srcids_dict = json.load(fp)
    if building not in invalid_srcids_dict:
        return None
    invalid_srcids = invalid_srcids_dict[building]

    print('{0} invalid points are removed.'.format(len(invalid_srcids)))
    for srcid in invalid_srcids:
        RawMetadata.objects(srcid=srcid).delete()
        LabeledMetadata.objects(srcid=srcid).delete()


if __name__ == '__main__':
    argparser.add_argument('-b', type=str, dest='building', required=True)
    argparser.add_argument('-top',
                           type='bool',
                           dest='topclass_flag',
                           default=False)
    # add raw metadata
    args = argparser.parse_args()
    building = args.building

    if building == 'uva_cse':
        load_uva_building(building)
        print('UVA CSE Done')
        sys.exit()
    elif building in UCB_BUILDINGS:
        extract_raw_ucb_labels()
        basedir = './groundtruth/'
示例#5
0
    if max_score < 0.75:
        return None
    # TODO: Selection mechanism needs to be more smart.
    #       Currently mapping
    #       {'pressure', 'filter', 'discharge', 'his', 'weatherPoint', 'air', 'sp'}
    #       ->
    #       "Discharge_Air"
    cand = ''
    for tagset, score in score_dict.items():
        if score == max_score:
            if len(cand.split('_')) < len(tagset.split('_')):
                cand = tagset
    return cand


argparser.add_argument('-src', type=str, dest='srcfile', 
                       help='Haystack instance in JSON or Zinc')
args = argparser.parse_args()


################ Init base information ##############

# Load Brick
brick_tag_g = rdflib.Graph()
brick_tag_g.parse('brick/BrickTag.ttl', format='turtle')
res = brick_tag_g.query("""
select ?s where {
?s <http://www.w3.org/2000/01/rdf-schema#subClassOf>+ <https://brickschema.org/schema/1.0.1/BrickFrame#Tag> .
}""")
brick_tags = [row[0].split('#')[-1] for row in res]
with open('brick/hb_map.json', 'r') as fp:
    hb_map = json.load(fp)