def status(use_screen=True, verbose=False): """Return a dict containing: - changes (dict of new_version, added_cols, removed_cols, added_lines, removed_lines, changed_lines built from daff) - added local (sheet names) - removed local (sheet names) - added remote (sheet names) - removed remote (sheet names) If use_screen, print the status of local sheets vs. remote sheets.""" set_logging(verbose) cogs_dir = validate_cogs_project() # Get the sets of changes tracked_sheets = get_tracked_sheets(cogs_dir) # Get rid of ignored sheets tracked_sheets = { x: y for x, y in tracked_sheets.items() if y.get("Ignore") != "True" } renamed = get_renamed_sheets(cogs_dir) changes = get_changes(cogs_dir, tracked_sheets, renamed) # Get a count of all changes change_count = set( list(changes["diffs"].keys()) + changes["added local"] + changes["added remote"] + changes["removed local"] + changes["removed remote"]) if len(change_count) == 0 and len(renamed) == 0: return None if use_screen: print_status(changes, renamed, tracked_sheets) return changes
def clear(keyword, on_sheets=None, verbose=False): """Remove formats, notes, and/or data validation rules from one or more sheets.""" cogs_dir = validate_cogs_project() set_logging(verbose) # Validate sheets tracked_sheets = get_tracked_sheets(cogs_dir) ignore = [ x for x, y in tracked_sheets.items() if y.get("Ignore") == "True" ] if not on_sheets: # If no sheet was supplied, clear from all on_sheets = [x for x in tracked_sheets.keys() if x not in ignore] untracked = [] for st in on_sheets: if st not in tracked_sheets.keys(): untracked.append(st) if st in ignore: logging.error( f"Cannot clear from ignored sheet '{st}' - this will be ignored" ) on_sheets.remove(st) if untracked: raise ClearError( f"The following sheet(s) are not part of this project: " + ", ".join(untracked)) if keyword == "formats": for st in on_sheets: clear_formats(cogs_dir, st) elif keyword == "notes": for st in on_sheets: clear_notes(cogs_dir, st) elif keyword == "validation": for st in on_sheets: clear_data_validation(cogs_dir, st) elif keyword == "all": for st in on_sheets: clear_formats(cogs_dir, st) clear_notes(cogs_dir, st) clear_data_validation(cogs_dir, st) else: raise ClearError("Unknown keyword: " + keyword)
def share(email, role, verbose=False): """Share the project spreadsheet with email addresses as reader, writer, or owner.""" set_logging(verbose) cogs_dir = validate_cogs_project() config = get_config(cogs_dir) gc = get_client_from_config(config) spreadsheet = gc.open_by_key(config["Spreadsheet ID"]) title = spreadsheet.title if role == "owner": share_spreadsheet(title, spreadsheet, email, "owner") elif role == "reader": share_spreadsheet(title, spreadsheet, email, "reader") elif role == "writer": share_spreadsheet(title, spreadsheet, email, "writer") else: raise RuntimeError("Unknown role passed to `share`: " + str(role))
def ls(verbose=False): """Return a list of [sheet, path] pairs.""" set_logging(verbose) cogs_dir = validate_cogs_project() tracked_sheets = get_tracked_sheets(cogs_dir) ignore = [ x for x, y in tracked_sheets.items() if y.get("Ignore") == "True" ] sheet_details = [["Tracked:"]] for sheet, details in tracked_sheets.items(): if sheet in ignore: continue sheet_details.append([" - " + sheet, "(" + details["Path"] + ")"]) if ignore: sheet_details.append([]) sheet_details.append(["Ignored:"]) for i in ignore: sheet_details.append([" - " + i]) return sheet_details
def apply(paths, verbose=False): """Apply a table to the spreadsheet. The type of table to 'apply' is based on the headers: standardized messages or data validation.""" cogs_dir = validate_cogs_project() set_logging(verbose) message_tables = [] data_validation_tables = [] for p in paths: if p.endswith("csv"): sep = "," else: sep = "\t" with open(p, "r") as f: # Get headers and rows reader = csv.DictReader(f, delimiter=sep) headers = [x.lower() for x in reader.fieldnames] rows = [] for r in reader: rows.append({k.lower(): v for k, v in r.items()}) # Determine type of table if headers == data_validation_headers: data_validation_tables.append(rows) elif "table" in headers and "cell" in headers: for h in headers: if h not in message_headers: raise ApplyError( f"The headers in table {p} are not valid for apply" ) message_tables.append(rows) else: raise ApplyError( f"The headers in table {p} are not valid for apply") if message_tables: apply_messages(cogs_dir, message_tables) if data_validation_tables: apply_data_validation(cogs_dir, data_validation_tables)
def diff(paths=None, use_screen=True, verbose=False): """Return a dict of sheet title to daff diff lines. If no paths are provided, diff over all sheets in the project. If use_screen, display an interactive curses screen with the diffs.""" set_logging(verbose) cogs_dir = validate_cogs_project() sheets = get_tracked_sheets(cogs_dir) tracked_paths = [details["Path"] for details in sheets.values()] if paths: # Update sheets to diff for p in paths: if p not in tracked_paths: raise DiffError( f"sheet '{p}' is not part of the current project") sheets = { sheet_title: details for sheet_title, details in sheets.items() if details["Path"] in paths } diffs = {} for sheet_title, details in sheets.items(): cached = get_cached_path(cogs_dir, sheet_title) local = details["Path"] if os.path.exists(local) and os.path.exists(cached): # Consider remote (cached) the old version to diff off of sheet_diff = get_diff(cached, local) diffs[sheet_title] = sheet_diff if not diffs: return None if use_screen: return display_diff(cogs_dir, diffs, sheets) return diffs
def delete(verbose=False): """Read COGS configuration and delete the spreadsheet corresponding to the spreadsheet ID. Remove .cogs directory.""" set_logging(verbose) cogs_dir = validate_cogs_project() config = get_config(cogs_dir) # Get a client to perform Sheet actions gc = get_client_from_config(config) # Delete the Sheet title = config["Title"] cwd = os.getcwd() logging.info(f"Removing COGS project '{title}' from {cwd}") try: ssid = config["Spreadsheet ID"] gc.del_spreadsheet(ssid) except gspread.exceptions.APIError as e: raise DeleteError(f"Unable to delete spreadsheet '{title}'\n" f"CAUSE: {e.response.text}") logging.info(f"successfully deleted Google Sheet '{title}' ({ssid})") # Remove the COGS data if os.path.exists(cogs_dir): shutil.rmtree(cogs_dir)
def add_all(verbose=False): set_logging(verbose) cogs_dir = validate_cogs_project() sheets = get_tracked_sheets(cogs_dir) sheet_lines = [] for sheet_title, details in sheets.items(): ignored = details.get("Ignore") if ignored: path = re.sub(r"[^A-Za-z0-9]+", "_", sheet_title.lower()) + ".tsv" if os.path.exists(path): now = datetime.now().strftime("%Y%m%d_%H%M%S") path = re.sub(r"[^A-Za-z0-9]+", "_", sheet_title.lower()) + f"_{now}.tsv" details["Path"] = path logging.info( f"Adding ignored sheet '{sheet_title}' to tracked sheets with path '{path}'" ) del details["Ignore"] details["Title"] = sheet_title sheet_lines.append(details) update_sheet(cogs_dir, sheet_lines, [])
def rm(paths, keep=False, verbose=False): """Remove a table (TSV or CSV) from the COGS project. This updates sheet.tsv and deletes the corresponding cached file.""" set_logging(verbose) cogs_dir = validate_cogs_project() # Make sure the sheets exist sheets = get_tracked_sheets(cogs_dir) path_to_sheet = { os.path.abspath(details["Path"]): sheet_title for sheet_title, details in sheets.items() } # Check for either untracked or ignored sheets in provided paths ignore = [x for x, y in sheets.items() if y.get("Ignore") == "True"] untracked = [] ignored = [] for p in paths: abspath = os.path.abspath(p) if abspath not in path_to_sheet: untracked.append(p) elif path_to_sheet[abspath] in ignore: ignored.append(p) if untracked: raise RmError( f"unable to remove untracked file(s): {', '.join(untracked)}.") if ignored: raise RmError( f"unable to remove ignored file(s): {', '.join(ignored)}") sheets_to_remove = { title: sheet for title, sheet in sheets.items() if sheet["Path"] in paths } # Make sure we are not deleting the last sheet as Google spreadsheet would refuse to do so if len(sheets) - len(sheets_to_remove) == 0: raise RmError( f"unable to remove {len(sheets_to_remove)} tracked sheet(s) - " "the spreadsheet must have at least one sheet.") # Maybe remove local copies if not keep: for p in paths: if os.path.exists(p): os.remove(p) # Remove the cached copies for sheet_title in sheets_to_remove.keys(): cached_path = get_cached_path(cogs_dir, sheet_title) if os.path.exists(cached_path): os.remove(cached_path) # Update sheet.tsv with open(f"{cogs_dir}/sheet.tsv", "w") as f: writer = csv.DictWriter( f, delimiter="\t", lineterminator="\n", fieldnames=[ "ID", "Title", "Path", "Description", "Frozen Rows", "Frozen Columns", "Ignore", ], ) writer.writeheader() for title, sheet in sheets.items(): if title not in sheets_to_remove.keys(): sheet["Title"] = title writer.writerow(sheet) # Update formats and notes sheet_formats = get_sheet_formats(cogs_dir) update_format(cogs_dir, sheet_formats, sheets_to_remove.keys()) sheet_notes = get_sheet_notes(cogs_dir) update_note(cogs_dir, sheet_notes, sheets_to_remove.keys())
def merge(verbose=False): """Copy cached sheets to their local paths.""" set_logging(verbose) cogs_dir = validate_cogs_project() cached_sheets = get_cached_sheets(cogs_dir) tracked_sheets = get_tracked_sheets(cogs_dir) tracked_cached = [ re.sub(r"[^A-Za-z0-9]+", "_", x.lower()) for x in tracked_sheets.keys() ] # Get the list of ignored sheet titles ignore = [ x for x, y in tracked_sheets.items() if y.get("Ignore") == "True" ] renamed_sheets = get_renamed_sheets(cogs_dir) renamed_local = { old: details for old, details in renamed_sheets.items() if details["where"] == "local" } renamed_remote = { old: details for old, details in renamed_sheets.items() if old not in renamed_local } # Add new remotes to tracked cached for details in renamed_remote.values(): tracked_cached.append( re.sub(r"[^A-Za-z0-9]+", "_", details["new"].lower())) remove_sheets = [s for s in cached_sheets if s not in tracked_cached] for sheet_title, details in tracked_sheets.items(): if sheet_title in ignore or sheet_title in renamed_remote: continue cached_path = get_cached_path(cogs_dir, sheet_title) local_sheet = details["Path"] if os.path.exists(cached_path): logging.info(f"Writing '{sheet_title}' to {local_sheet}") if local_sheet.endswith(".csv"): copy_to_csv(cached_path, local_sheet) else: shutil.copyfile(cached_path, local_sheet) # Handle renamed remote files by replacing their cached copies and adding to sheet.tsv for old_title, details in renamed_remote.items(): new_title = details["new"] cached_path = get_cached_path(cogs_dir, old_title) logging.info( f"Removing '{old_title}' from cached sheets and replacing with '{new_title}'" ) if os.path.exists(cached_path): os.remove(cached_path) # Write new copy local_sheet = details["path"] cached_path = get_cached_path(cogs_dir, new_title) if os.path.exists(cached_path): logging.info(f"Writing '{new_title}' to {local_sheet}") if local_sheet.endswith(".csv"): copy_to_csv(cached_path, local_sheet) else: shutil.copyfile(cached_path, local_sheet) # Update sheet.tsv sheet_details = tracked_sheets[old_title] del tracked_sheets[old_title] sheet_details["Path"] = local_sheet tracked_sheets[new_title] = sheet_details for sheet_title in remove_sheets: logging.info(f"Removing '{sheet_title}' from cached sheets") os.remove(f"{cogs_dir}/tracked/{sheet_title}.tsv") with open(f"{cogs_dir}/renamed.tsv", "w") as f: for old_title, details in renamed_local.items(): new_title = details["new"] path = details["path"] f.write(f"{old_title}\t{new_title}\t{path}\tlocal\n") if renamed_remote: # We need to update sheet.tsv if anything was renamed remotely sheet_details = [] for title, details in tracked_sheets.items(): details["Title"] = title sheet_details.append(details) update_sheet(cogs_dir, sheet_details, [])
def fetch(verbose=False): """Fetch all sheets from project spreadsheet to .cogs/ directory.""" set_logging(verbose) cogs_dir = validate_cogs_project() config = get_config(cogs_dir) gc = get_client_from_config(config) spreadsheet = gc.open_by_key(config["Spreadsheet ID"]) # Get the remote sheets from spreadsheet sheets = spreadsheet.worksheets() remote_sheets = get_remote_sheets(sheets) tracked_sheets = get_tracked_sheets(cogs_dir, include_no_id=True) id_to_title = { int(details["ID"]): sheet_title for sheet_title, details in tracked_sheets.items() if details.get("ID") } # Get details about renamed sheets renamed_local = get_renamed_sheets(cogs_dir) renamed_remote = {} # Format ID to format for cell formatting id_to_format = get_format_dict(cogs_dir) if id_to_format: # Format to format ID format_to_id = { json.dumps(v, sort_keys=True): k for k, v in id_to_format.items() } # Next ID for new formats format_ids = list(id_to_format.keys()) format_ids.sort() next_fmt_id = int(format_ids[-1]) + 1 else: format_to_id = {} next_fmt_id = 1 # Export the sheets as TSV to .cogs/ # We also collect the formatting and note data for each sheet during this step sheet_formats = {} sheet_notes = {} sheet_dv_rules = {} sheet_frozen = {} # Lines to add to sheet.tsv of sheets to ignore new_ignore = [] for sheet in sheets: remote_title = sheet.title if remote_title in tracked_sheets and tracked_sheets[remote_title].get( "Ignore") == "True": logging.info(f"Skipping ignored sheet '{remote_title}'...") continue # Download the sheet as the renamed sheet if necessary if remote_title in renamed_local: st = renamed_local[remote_title]["new"] logging.info( f"Downloading sheet '{remote_title}' as {st} (renamed locally)" ) else: st = remote_title if sheet.id in id_to_title: local_title = id_to_title[sheet.id] if local_title != remote_title: # The sheet title has been changed remotely # This will be updated in tracking but the local sheet will remain old_path = tracked_sheets[local_title]["Path"] logging.warning( f"Local sheet '{local_title}' has been renamed to '{st}' remotely:" f"\n - '{local_title}' is removed from tracking and replaced with '{st}'" f"\n - {old_path} will not be updated when running `cogs pull` " f"\n - changes to {old_path} will not be pushed to the remote spreadsheet" ) renamed_remote[local_title] = { "new": st, "path": re.sub(r"[^A-Za-z0-9]+", "_", st.lower()) + ".tsv", } logging.info( f"Downloading sheet '{local_title}' as '{st}' (renamed remotely)" ) else: logging.info(f"Downloading sheet '{st}'") else: # Add as an ignored sheet new_ignore.append({ "ID": sheet.id, "Title": st, "Ignore": True }) logging.info(f"Ignoring new remote sheet '{st}'") continue # Get frozen rows & columns sheet_frozen[st] = { "row": sheet.frozen_row_count, "col": sheet.frozen_col_count, } # Get the cells with format, value, and note from remote sheet cells = get_cell_data(cogs_dir, sheet) # Create a map of rule -> locs for data validation dv_rules = {} str_to_rule = {} for loc, cell_data in cells.items(): if "data_validation" not in cell_data: continue data_validation = cell_data["data_validation"] condition = data_validation["condition"] bc = gf.BooleanCondition(condition, data_validation["value"]) dv = gf.DataValidationRule(bc) if str(dv) not in str_to_rule: str_to_rule[str(dv)] = dv if str(dv) in dv_rules: locs = dv_rules[str(dv)] else: locs = [] locs.append(loc) dv_rules[str(dv)] = locs # Aggregate by location and format for validate.tsv dv_rows = clean_data_validation_rules(dv_rules, str_to_rule) sheet_dv_rules[st] = dv_rows # Get the ending row & col that have values # Otherwise we end up with a bunch of empty rows/columns # Also get the data validation max_row = 0 max_col = 0 for c in cells.keys(): row, col = gspread.utils.a1_to_rowcol(c) if row > max_row: max_row = row if col > max_col: max_col = col # Cell label to format dict cell_to_format = { cell: data["format"] for cell, data in cells.items() if "format" in data } # Create a cell to format ID dict based on the format dict for each cell cell_to_format_id = {} last_fmt = None cell_range_start = None cell_range_end = None for cell, fmt in cell_to_format.items(): if not fmt: if last_fmt: if not cell_range_end or cell_range_start == cell_range_end: cell_to_format_id[cell_range_start] = last_fmt else: cell_to_format_id[ f"{cell_range_start}:{cell_range_end}"] = last_fmt last_fmt = None cell_range_start = None cell_range_end = None continue key = json.dumps(fmt, sort_keys=True) if key in format_to_id: # Format already exists, assign that ID fmt_id = format_to_id[key] else: # Assign new ID fmt_id = next_fmt_id format_to_id[key] = fmt_id id_to_format[fmt_id] = fmt next_fmt_id += 1 if last_fmt and fmt_id == last_fmt: # The last cell had a format and the this cell's format is the same as the last # so we increase the range cell_range_end = cell elif last_fmt and fmt_id != last_fmt: # The last cell had a format but it was different than the current format if cell_range_start == cell_range_end or not cell_range_end: # Not a range, just a single cell (the previous cell) cell_to_format_id[cell_range_start] = last_fmt else: cell_to_format_id[ f"{cell_range_start}:{cell_range_end}"] = last_fmt # Restarting a new range at this cell cell_range_start = cell cell_range_end = None else: # No last formatting to compare to, start a new range cell_range_start = cell cell_range_end = cell last_fmt = fmt_id if cell_to_format_id: sheet_formats[st] = cell_to_format_id # Add the cell to note cell_to_note = { cell: data["note"] for cell, data in cells.items() if "note" in data } if cell_to_note: sheet_notes[st] = cell_to_note # Write values to .cogs/tracked/{sheet title}.tsv cached_path = get_cached_path(cogs_dir, st) with open(cached_path, "w") as f: lines = sheet.get_all_values() writer = csv.writer(f, delimiter="\t", lineterminator="\n") writer.writerows(lines) # Write or rewrite formats JSON with new dict with open(f"{cogs_dir}/formats.json", "w") as f: f.write(json.dumps(id_to_format, sort_keys=True, indent=4)) # Update local sheets details in sheet.tsv with new IDs & details for current tracked sheets all_sheets = get_updated_sheet_details(tracked_sheets, remote_sheets, sheet_frozen) removed_titles = remove_sheets(cogs_dir, sheets, tracked_sheets, renamed_local, renamed_remote) # Add renamed-remote for old_title, details in renamed_remote.items(): with open(f"{cogs_dir}/renamed.tsv", "a") as f: new_title = details["new"] new_path = details["path"] f.write(f"{old_title}\t{new_title}\t{new_path}\tremote\n") # Rewrite format.tsv and note.tsv with current remote formats & notes update_format(cogs_dir, sheet_formats, removed_titles) update_note(cogs_dir, sheet_notes, removed_titles) # Remove old data validation rules and rewrite with new with open(f"{cogs_dir}/validation.tsv", "w") as f: f.write("Sheet\tRange\tCondition\tValue\n") update_data_validation(cogs_dir, sheet_dv_rules, removed_titles) # Then update sheet.tsv all_sheets.extend(new_ignore) update_sheet(cogs_dir, all_sheets, removed_titles)
def add(path, title=None, description=None, freeze_row=0, freeze_column=0, verbose=False): """Add a table (TSV or CSV) to the COGS project. This updates sheet.tsv.""" set_logging(verbose) cogs_dir = validate_cogs_project() sheets = get_tracked_sheets(cogs_dir) if path in sheets and sheets[path]["Ignore"]: # The provided path is just the title of an ignored sheet add_ignored(cogs_dir, sheets, path, None, description=description) return if title in sheets and sheets[title]["Ignore"]: # A path to write the ignored sheet was provided add_ignored(cogs_dir, sheets, title, path, description=description) return if not os.path.exists(path): # Create an empty file if the file doesn't exist f = open(path, "w") f.close() if not title: # Create the sheet title from file basename title = ntpath.basename(path).split(".")[0] # Make sure we aren't duplicating a table local_sheets = get_tracked_sheets(cogs_dir) if title in local_sheets: raise AddError(f"'{title}' sheet already exists in this project") # Make sure we aren't duplicating a path local_paths = {x["Path"]: t for t, x in local_sheets.items()} if path in local_paths.keys(): other_title = local_paths[path] raise AddError(f"Local table {path} already exists as '{other_title}'") # Maybe get a description if not description: description = "" # Finally, add this TSV to sheet.tsv with open(f"{cogs_dir}/sheet.tsv", "a") as f: writer = csv.DictWriter( f, delimiter="\t", lineterminator="\n", fieldnames=[ "ID", "Title", "Path", "Description", "Frozen Rows", "Frozen Columns", "Ignore", ], ) # ID gets filled in when we add it to the Sheet writer.writerow({ "ID": "", "Title": title, "Path": path, "Description": description, "Frozen Rows": freeze_row, "Frozen Columns": freeze_column, "Ignore": False, }) logging.info(f"{title} successfully added to project")