def load(self, source): file_name, file_format = os.path.splitext(source) archesjson = False if file_format == '.arches': reader = ArchesReader() print '\nVALIDATING ARCHES FILE ({0})'.format(source) reader.validate_file(source) elif file_format == '.json': archesjson = True reader = JsonReader() start = time() resources = reader.load_file(source) print '\nLOADING RESOURCES ({0})'.format(source) relationships = None related_resource_records = [] relationships_file = file_name + '.relations' elapsed = (time() - start) print 'time to parse {0} resources = {1}'.format(file_name, elapsed) results = self.resource_list_to_entities(resources, archesjson) if os.path.exists(relationships_file): relationships = csv.DictReader(open(relationships_file, 'r'), delimiter='|') for relationship in relationships: related_resource_records.append(self.relate_resources(relationship, results['legacyid_to_entityid'], archesjson)) else: print 'No relationship file'
def load(self, source, appending=False): file_name, file_format = os.path.splitext(source) archesjson = False if file_format == '.shp': reader = ShapeReader() elif file_format == '.arches': reader = ArchesReader() print '\nVALIDATING ARCHES FILE ({0})'.format(source) # reader.validate_file(source) elif file_format == '.json': archesjson = True reader = JsonReader() print '\nVALIDATING JSON FILE ({0})'.format(source) reader.validate_file(source) elif file_format == '.jsonl': archesjson = True reader = JsonReader() print '\nNO VALIDATION USED ON JSONL FILE ({0})'.format(source) d = datetime.datetime.now() load_id = 'LOADID:{0}-{1}-{2}-{3}-{4}-{5}'.format( d.year, d.month, d.day, d.hour, d.minute, d.microsecond) loaded_ct = 0 with open(source, "rb") as openf: lines = openf.readlines() for line in lines: resource = json.loads(line) result = self.resource_list_to_entities( [resource], True, False, filename=os.path.basename(source), load_id=load_id) loaded_ct += 1 return {"count": loaded_ct} start = time() resources = reader.load_file(source) print '\nLOADING RESOURCES ({0})'.format(source) relationships = None related_resource_records = [] relationships_file = file_name + '.relations' elapsed = (time() - start) print 'time to parse {0} resources = {1}'.format(file_name, elapsed) results = self.resource_list_to_entities( resources, archesjson, appending, filename=os.path.basename(source)) if os.path.exists(relationships_file): with open(relationships_file, "rb") as openf: lines = openf.readlines() if "," in lines[0]: delim = "," elif "|" in lines[0]: delim = "|" else: delim = "," relationships = csv.DictReader(open(relationships_file, 'r'), delimiter=delim) for relationship in relationships: related_resource_records.append( self.relate_resources(relationship, results['legacyid_to_entityid'], archesjson)) else: print 'No relationship file' return results