def main(): base_dir, dry_run = process_input() for rec, pth in get_all_records(base_dir): print("\n\n******************\n") print("processing:", rec.oil_id, rec.metadata.name) fixer = FixAPI(rec) flag, msg = fixer.check() if flag is True: print(msg) print("Cleaning up!") fixer.cleanup() print("API is now:", rec.metadata.API) if not dry_run: print("Saving out:", pth) rec.to_file(pth) else: print("Nothing saved") elif flag is False: print(msg) print("It's a:", rec.metadata.product_type) print("Densities are:", rec.sub_samples[0].physical_properties.densities) else: print(msg)
def run_through(): base_dir, dry_run = process_input(USAGE=USAGE) # read the data file with open("ADIOS_Locations.csv") as infile: infile.readline() data = {} for line in infile: if line.strip(): line = line.strip().split("|") line = [s.strip() for s in line] ID, __, type, coords, __ = line coords = literal_eval(coords) data[ID] = (type, coords) for id, coord_info in data.items(): pth = Path(base_dir) / id[:2] / (id + ".json") oil = Oil.from_file(pth) name = oil.metadata.name oil.metadata.location_coordinates = LocationCoordinates(*coord_info) print("\nProcessing:", id, name) print("adding:", coord_info) if not dry_run: print("Saving out:", pth) oil.to_file(pth) else: print("Dry Run: Nothing saved")
def main(): base_dir, dry_run = ads.process_input(USAGE) count = 0 for rec, pth in ads.get_all_records(base_dir): # print("\n\n******************\n") # print("processing:", rec.oil_id) # print("API is:", rec.metadata.API) reference = rec.metadata.reference if (reference.year in {1990, 1996, 1990, 1999, 1992} and "Jokuty" in reference.reference): # rec.metadata.comments = "\n".join((rec.metadata.comments, ADDED_COMMENT)).strip() if NEW_COMMENT not in rec.metadata.comments: count += 1 if rec.metadata.comments: rec.metadata.comments = "\n\n".join( (rec.metadata.comments, NEW_COMMENT)) else: rec.metadata.comments = NEW_COMMENT print("Adding note to:", rec.oil_id) print(reference.reference) # rec.metadata.comments = rec.metadata.comments.replace(OLD_COMMENT, # NEW_COMMENT) if not dry_run: print("Saving out:", pth) rec.to_file(pth) else: print("Nothing saved") print(f"{count} records changed")
def main(): base_dir, dry_run = process_input() for rec, pth in get_all_records(base_dir): print("\n\n******************\n") print("processing:", rec.oil_id, rec.metadata.name) # loop through the subsamples for ss in rec.sub_samples: pp = ss.physical_properties move_ift_note(pp.interfacial_tension_water) move_ift_note(pp.interfacial_tension_air) move_ift_note(pp.interfacial_tension_seawater) if not dry_run: print("Saving out:", pth) rec.to_file(pth) else: print("Nothing saved")
def run_through(): base_dir, dry_run = process_input(USAGE=USAGE) for oil, pth in get_all_records(base_dir): id = oil.oil_id name = oil.metadata.name location = oil.metadata.location if location.lower() == orig_location: print("\nProcessing:", id, name) print("changing location to:", new_location) oil.metadata.location = new_location if not dry_run: print("Saving out:", pth) oil.to_file(pth) else: print("Dry Run: Nothing saved")
def main(): base_dir, dry_run = ads.process_input(USAGE) print("writing: adios_distillation_data.csv") outfile = open("adios_distillation_data.csv", 'w') outfile.write( f"Oil ID\tName\tReference\tdistillation_method\tfraction_recovered\n") for rec, pth in ads.get_all_records(base_dir): ID = rec.oil_id name = rec.metadata.name reference = rec.metadata.reference.reference dist_method = rec.sub_samples[0].distillation_data.method frac_recov = rec.sub_samples[0].distillation_data.fraction_recovered outfile.write( f'{ID} \t"{name}" \t"{reference}" \t{dist_method} \t{frac_recov}\n' )
def add_the_labels(): # Look for "replace" try: sys.argv.remove("replace") replace = True except ValueError: replace = False base_dir, dry_run = process_input(USAGE) with open("labels.csv", 'w') as outfile: outfile.write("ID, Name, Product Type, Labels\n") for oil, pth in get_all_records(base_dir): id = oil.oil_id name = oil.metadata.name pt = oil.metadata.product_type print("\nFor Oil:", id, name) try: labels = get_suggested_labels(oil) print(labels) if not replace: labels.update(oil.metadata.labels) outfile.write(f"{id}, {name}, {pt}, {str(labels).strip('{}')}\n") if not dry_run: print("Saving out:", pth) oil.metadata.labels = list(labels) oil.to_file(pth) else: print("Nothing saved") except: # if anything goes wrong, we won't add an labels print("Something went wrong -- no labels") print("output written to: labels.txt")
def run_through(): base_dir, dry_run = process_input(USAGE=USAGE) print("Processing JSON files in:", base_dir) pth = None for pth in sorted(base_dir.rglob("*.json")): print("processing:", pth) try: oil = Oil.from_file(pth) except Exception as ex: print("Something went wrong loading:", pth) print(ex) raise if not dry_run: print("Saving out:", pth) oil.to_file(pth) else: print("Dry Run: Nothing saved") if pth is None: print("No files were found in:", base_dir)
def run_through(): base_dir, dry_run = process_input(USAGE=USAGE) new_ref = ('Environment and Climate Change Canada, Environment ' 'Canada Crude Oil and Petroleum Product Database, ' 'Environment and Climate Change Canada, 2021.\n\n' 'url: https://open.canada.ca/data/en/dataset/' '53c38f91-35c8-49a6-a437-b311703db8c5') for oil, pth in get_all_records(base_dir): id = oil.oil_id name = oil.metadata.name if id[:2] == 'EC': print("\nProcessing:", id, name) print("changing reference to:", new_ref) oil.metadata.reference.reference = new_ref if not dry_run: print("Saving out:", pth) oil.to_file(pth) else: print("Dry Run: Nothing saved")
def add_them(data): missing = open("missing_records.csv", 'w', encoding="utf-8") name_mismatch = open("name_mismatch_records.csv", 'w', encoding="utf-8") base_dir, dry_run = process_input(USAGE) for oil, pth in get_all_records(base_dir): ID = oil.oil_id try: row = data[ID] except KeyError: print(f"{ID} not in the spreadsheet") missing.write(",".join([ID, oil.metadata.name])) name_mismatch.write("\n") continue name = row[0] print("Processing:", ID) if name == oil.metadata.name: anames = [name.strip() for name in row[10].strip().split(",")] # clean out the duplicates anames = [n for n in anames if n.lower() not in name.lower()] if anames: oil.metadata.alternate_names = anames print("Adding:", oil.oil_id, oil.metadata.name, oil.metadata.alternate_names) if not dry_run: print("Saving out:", pth) oil.to_file(pth) else: print("Nothing saved") else: print("Name doesn't match!", name, oil.metadata.name) name_mismatch.write(",".join([ID, oil.metadata.name, name])) name_mismatch.write("\n")
#!/usr/bin/env python """ This script looks at the distillation type, and sets the unit_type properly """ from adios_db.models.oil.cleanup.distillation import FixCutUnitType import adios_db.scripting as ads json_data_dir, dry_run = ads.process_input() # Read the data for rec, pth in ads.get_all_records(json_data_dir): print("processing:", rec.oil_id) fixer = FixCutUnitType(rec) flag, msg = fixer.check() if flag is True: print(msg) print("Cleaning up!") fixer.cleanup() if not dry_run: print("Saving out:", pth) rec.to_file(pth) else: print("Nothing saved")
#!/usr/bin/env python """ Example script for how to query the database and export a subset of the data to a CSV file As is, this exports viscosity data for crude oils """ # the scripting module has a few utilities and the # core Python objects you may need for scripting # calling it "adb" for easy typing import adios_db.scripting as adb base_dir, dry_run = adb.process_input() # create a new text file for output outfile = open("example_data.csv", 'w', encoding="utf-8") # write the header row: outfile.write('Name, ID, Dynamic Viscosity, unit, reference temp, unit\n') # Loop through all the JSON files in the given directory: for oil, path in adb.get_all_records(base_dir): print("\n**** ", oil.metadata.name) # select the desired product types: if oil.metadata.product_type in { "Crude Oil NOS", "Condensate", "Tight Oil", }: